diff --git a/modules.json b/modules.json index 03b49c8c..2adc3056 100644 --- a/modules.json +++ b/modules.json @@ -36,6 +36,17 @@ } } } + }, + "https://github.com/bigbio/nf-modules.git": { + "modules": { + "bigbio": { + "thermorawfileparser": { + "branch": "main", + "git_sha": "f85bcb529b16e03e8f46374d0fde5bfaf604b676", + "installed_by": ["modules"] + } + } + } } } } diff --git a/modules/bigbio/thermorawfileparser/environment.yml b/modules/bigbio/thermorawfileparser/environment.yml new file mode 100644 index 00000000..63b8fc64 --- /dev/null +++ b/modules/bigbio/thermorawfileparser/environment.yml @@ -0,0 +1,7 @@ +name: thermorawfileparser +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::thermorawfileparser=1.4.5 diff --git a/modules/bigbio/thermorawfileparser/main.nf b/modules/bigbio/thermorawfileparser/main.nf new file mode 100644 index 00000000..31ce4d0b --- /dev/null +++ b/modules/bigbio/thermorawfileparser/main.nf @@ -0,0 +1,67 @@ +process THERMORAWFILEPARSER { + tag "$meta.mzml_id" + label 'process_low' + label 'process_single' + label 'error_retry' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/thermorawfileparser:1.4.5--h05cac1d_1' : + 'biocontainers/thermorawfileparser:1.4.5--h05cac1d_1' }" + + stageInMode { + if (task.attempt == 1) { + if (task.executor == "awsbatch") { + 'symlink' + } else { + 'link' + } + } else if (task.attempt == 2) { + if (task.executor == "awsbatch") { + 'copy' + } else { + 'symlink' + } + } else { + 'copy' + } + } + input: + tuple val(meta), path(rawfile) + + output: + tuple val(meta), path("*.{mzML,mgf,parquet}"), emit: convert_files + path "versions.yml", emit: versions + path "*.log", emit: log + + script: + def args = task.ext.args ?: '' + // Default to indexed mzML format (-f=2) if not specified in args + def formatArg = args.contains('-f=') ? '' : '-f=2' + + """ + ThermoRawFileParser.sh -i='${rawfile}' ${formatArg} ${args} -o=./ 2>&1 | tee '${rawfile.baseName}_conversion.log' + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ThermoRawFileParser: \$(ThermoRawFileParser.sh --version) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.mzml_id}" + def args = task.ext.args ?: '' + // Determine output format from args, default to mzML + // Format 0 = MGF, formats 1-2 = mzML, format 3 = Parquet, format 4 = None + def outputExt = (args =~ /-f=0\b/).find() ? 'mgf' : 'mzML' + + """ + touch '${prefix}.${outputExt}' + touch '${prefix}_conversion.log' + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ThermoRawFileParser: \$(ThermoRawFileParser.sh --version) + END_VERSIONS + """ +} diff --git a/modules/bigbio/thermorawfileparser/meta.yml b/modules/bigbio/thermorawfileparser/meta.yml new file mode 100644 index 00000000..0ab076f9 --- /dev/null +++ b/modules/bigbio/thermorawfileparser/meta.yml @@ -0,0 +1,49 @@ +name: thermorawfileparser +description: Convert RAW file to mzML or MGF files +keywords: + - raw + - mzML + - MGF + - OpenMS +tools: + - thermorawfileparser: + description: | + ThermoRawFileParser converts Thermo RAW files to open standard formats like mzML, producing indexed output files. + Use `task.ext.args` to pass additional arguments, e.g.: + - `-f=0` for MGF output, `-f=1` for mzML, `-f=2` for indexed mzML (default), `-f=3` for Parquet, `-f=4` for None + - `-L` or `--msLevel=VALUE` to select MS levels (e.g., `-L=1,2` or `--msLevel=1-3`) + homepage: https://github.com/compomics/ThermoRawFileParser + documentation: https://github.com/compomics/ThermoRawFileParser +input: + - meta: + type: map + description: | + Groovy Map containing sample information + - rawfile: + type: file + description: | + Thermo RAW file + pattern: "*.{raw,RAW}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', mzml_id:'UPS1_50amol_R3' ] + - convert_files: + type: file + description: | + Converted files in mzML or MGF format depending on the format parameter (-f). + Format options: 0 for MGF, 1 for mzML, 2 for indexed mzML (default), 3 for Parquet, 4 for None. + pattern: "*.{mzML,mgf}" + - log: + type: file + description: log file + pattern: "*.log" + - versions: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@daichengxin" + - "@ypriverol" diff --git a/modules/bigbio/thermorawfileparser/tests/main.nf.test b/modules/bigbio/thermorawfileparser/tests/main.nf.test new file mode 100644 index 00000000..355fbb15 --- /dev/null +++ b/modules/bigbio/thermorawfileparser/tests/main.nf.test @@ -0,0 +1,53 @@ +nextflow_process { + + name "Test Process THERMORAWFILEPARSER" + script "../main.nf" + process "THERMORAWFILEPARSER" + tag "modules" + tag "modules_bigbio" + tag "thermorawfileparser" + + test("Should convert RAW to mzML") { + + when { + process { + """ + input[0] = [ + [ id: 'test', mzml_id: 'UPS1_50amol_R3' ], + file(params.test_data['proteomics']['msspectra']['ups1_50amol_r3'], checkIfExists: false) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out.versions).match("versions") + assert new File(process.out.convert_files[0][1]).name == 'TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.mzML' + assert process.out.log.size() == 1 + } + } + + test("Should run stub mode") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id: 'test', mzml_id: 'test_sample' ], + file(params.test_data['proteomics']['msspectra']['ups1_50amol_r3'], checkIfExists: false) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out.versions).match("versions_stub") + assert new File(process.out.convert_files[0][1]).name == 'test_sample.mzML' + assert process.out.log.size() == 1 + } + } +} diff --git a/modules/bigbio/thermorawfileparser/tests/main.nf.test.snap b/modules/bigbio/thermorawfileparser/tests/main.nf.test.snap new file mode 100644 index 00000000..6562491e --- /dev/null +++ b/modules/bigbio/thermorawfileparser/tests/main.nf.test.snap @@ -0,0 +1,26 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,dc9625538c025d615109ef8cac3a86ab" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2025-12-11T06:27:00.000000" + }, + "versions_stub": { + "content": [ + [ + "versions.yml:md5,dc9625538c025d615109ef8cac3a86ab" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2025-12-11T06:27:00.000000" + } +} diff --git a/modules/bigbio/thermorawfileparser/tests/nextflow.config b/modules/bigbio/thermorawfileparser/tests/nextflow.config new file mode 100644 index 00000000..0293c16f --- /dev/null +++ b/modules/bigbio/thermorawfileparser/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } +}