diff --git a/.gitignore b/.gitignore index 0eb3ce9..8f22c8c 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,5 @@ testing* null/ .idea/ test_data +.nf-test/ +.nf-test.log diff --git a/modules.json b/modules.json index 32eed01..340e156 100644 --- a/modules.json +++ b/modules.json @@ -5,10 +5,40 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "catpack/addnames": { + "branch": "master", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "installed_by": ["modules"] + }, + "catpack/bins": { + "branch": "master", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "installed_by": ["modules"] + }, + "catpack/download": { + "branch": "master", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "installed_by": ["modules"] + }, + "catpack/prepare": { + "branch": "master", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "installed_by": ["modules"] + }, + "catpack/summarise": { + "branch": "master", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "installed_by": ["modules"] + }, "multiqc": { "branch": "master", "git_sha": "e10b76ca0c66213581bec2833e30d31f239dec0b", "installed_by": ["modules"] + }, + "untar": { + "branch": "master", + "git_sha": "00ee87ebb541af0008596400ce6d5f66d79d5408", + "installed_by": ["modules"] } } }, diff --git a/modules/nf-core/catpack/addnames/environment.yml b/modules/nf-core/catpack/addnames/environment.yml new file mode 100644 index 0000000..39264bf --- /dev/null +++ b/modules/nf-core/catpack/addnames/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::cat=6.0.1 diff --git a/modules/nf-core/catpack/addnames/main.nf b/modules/nf-core/catpack/addnames/main.nf new file mode 100644 index 0000000..2854e24 --- /dev/null +++ b/modules/nf-core/catpack/addnames/main.nf @@ -0,0 +1,56 @@ +process CATPACK_ADDNAMES { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/cat:6.0.1--hdfd78af_1' + : 'biocontainers/cat:6.0.1--hdfd78af_1'}" + + input: + tuple val(meta), path(input) + tuple val(meta2), path(taxonomy) + + output: + tuple val(meta), path("${prefix}.txt"), emit: txt + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + if ("${input}" == "${prefix}.txt") { + error("Input and output names are the same, set prefix in module configuration to disambiguate!") + } + """ + CAT_pack add_names \\ + ${args} \\ + -i ${input} \\ + -t ${taxonomy} \\ + -o ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + catpack: \$(CAT_pack --version | sed 's/CAT_pack pack v//g;s/ .*//g') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "CAT_pack add_names \\ + ${args} \\ + -i ${input} \\ + -o ${prefix}.txt" + + touch ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + catpack: \$(CAT_pack --version | sed 's/CAT_pack pack v//g;s/ .*//g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/catpack/addnames/meta.yml b/modules/nf-core/catpack/addnames/meta.yml new file mode 100644 index 0000000..3373c14 --- /dev/null +++ b/modules/nf-core/catpack/addnames/meta.yml @@ -0,0 +1,75 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "catpack_addnames" +description: Taxonomic classification of long DNA sequences and metagenome assembled + genomes (e.g. MAGs / bins). +keywords: + - taxonomic classification + - classification + - long reads + - mags + - assembly +tools: + - "catpack": + description: "CAT/BAT: tool for taxonomic classification of contigs and metagenome-assembled + genomes (MAGs)" + homepage: "https://github.com/MGXlab/CAT_pack" + documentation: "https://github.com/MGXlab/CAT_pack" + tool_dev_url: "https://github.com/MGXlab/CAT_pack" + doi: "10.1186/s13059-019-1817-x" + licence: ["MIT"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - input: + type: file + description: Classification or ORF2LCA output file from CAT/BAT/RAT + pattern: "*.txt" + ontologies: + - edam: "http://edamontology.org/format_3475" + + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - taxonomy: + type: directory + description: "Directory containing taxonomy files: names.dmp, nodes.dmp, acc2taxid.txt" + pattern: "*/" + ontologies: + - edam: "http://edamontology.org/data_1049" + +output: + txt: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - ${prefix}.txt: + type: map + description: | + CAT/BAT/RAT classification file with added taxonomic names + pattern: "*.{bam,cram,sam}" + ontologies: + - edam: "http://edamontology.org/format_3475" + + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/catpack/addnames/tests/main.nf.test b/modules/nf-core/catpack/addnames/tests/main.nf.test new file mode 100644 index 0000000..0e50115 --- /dev/null +++ b/modules/nf-core/catpack/addnames/tests/main.nf.test @@ -0,0 +1,84 @@ +nextflow_process { + + name "Test Process CATPACK_ADDNAMES" + script "../main.nf" + process "CATPACK_ADDNAMES" + config './nextflow.config' + + tag "modules" + tag "modules_nfcore" + tag "catpack" + tag "catpack/addnames" + tag "catpack/prepare" + tag "catpack/contigs" + + setup { + run('CATPACK_PREPARE') { + script '../../prepare/main.nf' + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] ] + input[1] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_nodes.dmp', checkIfExists: true) ] + input[2] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_names.dmp', checkIfExists: true) ] + input[3] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot.accession2taxid.gz', checkIfExists: true) ] + """ + } + } + + run('CATPACK_CONTIGS') { + script '../../contigs/main.nf' + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ] + input[1] = CATPACK_PREPARE.out.db + input[2] = CATPACK_PREPARE.out.taxonomy + input[3] = [[:], []] + input[4] = [[:], []] + """ + } + } + } + + test("sarscov2 - genome - fasta") { + + when { + process { + """ + input[0] = CATPACK_CONTIGS.out.contig2classification + input[1] = CATPACK_PREPARE.out.taxonomy + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - genome -fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = CATPACK_CONTIGS.out.contig2classification + input[1] = CATPACK_PREPARE.out.taxonomy + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/catpack/addnames/tests/main.nf.test.snap b/modules/nf-core/catpack/addnames/tests/main.nf.test.snap new file mode 100644 index 0000000..1cb588a --- /dev/null +++ b/modules/nf-core/catpack/addnames/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "sarscov2 - genome -fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,f84de68b986e9bf88446b5077d33ddaa" + ], + "txt": [ + [ + { + "id": "test" + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,f84de68b986e9bf88446b5077d33ddaa" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-23T09:37:47.561657815" + }, + "sarscov2 - genome - fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.txt:md5,bf9914b7772f148d4ed2604a5eba680f" + ] + ], + "1": [ + "versions.yml:md5,f84de68b986e9bf88446b5077d33ddaa" + ], + "txt": [ + [ + { + "id": "test" + }, + "test.txt:md5,bf9914b7772f148d4ed2604a5eba680f" + ] + ], + "versions": [ + "versions.yml:md5,f84de68b986e9bf88446b5077d33ddaa" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-23T09:36:41.022309088" + } +} \ No newline at end of file diff --git a/modules/nf-core/catpack/addnames/tests/nextflow.config b/modules/nf-core/catpack/addnames/tests/nextflow.config new file mode 100644 index 0000000..07f3352 --- /dev/null +++ b/modules/nf-core/catpack/addnames/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: CATPACK_PREPARE { + ext.args = "--common_prefix database" + } +} diff --git a/modules/nf-core/catpack/bins/environment.yml b/modules/nf-core/catpack/bins/environment.yml new file mode 100644 index 0000000..39264bf --- /dev/null +++ b/modules/nf-core/catpack/bins/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::cat=6.0.1 diff --git a/modules/nf-core/catpack/bins/main.nf b/modules/nf-core/catpack/bins/main.nf new file mode 100644 index 0000000..b9555d8 --- /dev/null +++ b/modules/nf-core/catpack/bins/main.nf @@ -0,0 +1,68 @@ +process CATPACK_BINS { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/cat:6.0.1--hdfd78af_1' + : 'biocontainers/cat:6.0.1--hdfd78af_1'}" + + input: + tuple val(meta), path(bins, stageAs: 'bins/*') + tuple val(meta2), path(database) + tuple val(meta3), path(taxonomy) + tuple val(meta4), path(proteins) + tuple val(meta5), path(diamond_table) + val(bin_suffix) + + output: + tuple val(meta), path("*.ORF2LCA.txt"), emit: orf2lca + tuple val(meta), path("*.bin2classification.txt"), emit: bin2classification + tuple val(meta), path("*.log"), emit: log + tuple val(meta), path("*.diamond"), optional: true, emit: diamond + tuple val(meta), path("*.predicted_proteins.faa"), optional: true, emit: faa + tuple val(meta), path("*.gff"), optional: true, emit: gff + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def premade_proteins = proteins ? "-p ${proteins}" : '' + def premade_table = diamond_table ? "-d ${diamond_table}" : '' + """ + CAT_pack bins \\ + -n ${task.cpus} \\ + -b bins/ \\ + -d ${database} \\ + -t ${taxonomy} \\ + -s ${bin_suffix} \\ + ${premade_proteins} \\ + ${premade_table} \\ + -o ${prefix} \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + catpack: \$(CAT_pack --version | sed 's/CAT_pack pack v//g;s/ .*//g') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.ORF2LCA.txt + touch ${prefix}.bin2classification.txt + touch ${prefix}.log + touch ${prefix}.diamond + touch ${prefix}.predicted_proteins.faa + touch ${prefix}.predicted_proteins.gff + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + catpack: \$(CAT_pack --version | sed 's/CAT_pack pack v//g;s/ .*//g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/catpack/bins/meta.yml b/modules/nf-core/catpack/bins/meta.yml new file mode 100644 index 0000000..257e27c --- /dev/null +++ b/modules/nf-core/catpack/bins/meta.yml @@ -0,0 +1,174 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "catpack_bins" +description: Taxonomic classification of long DNA sequences and metagenome assembled + genomes (e.g. MAGs / bins). +keywords: + - taxonomic classification + - classification + - long reads + - mags + - assembly +tools: + - "catpack": + description: "CAT/BAT: tool for taxonomic classification of contigs and metagenome-assembled + genomes (MAGs)" + homepage: "https://github.com/MGXlab/CAT_pack" + documentation: "https://github.com/MGXlab/CAT_pack" + tool_dev_url: "https://github.com/MGXlab/CAT_pack" + doi: "10.1186/s13059-019-1817-x" + licence: ["MIT"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - bins: + type: file + description: One or more nucleotide FASTA file containing binned long DNA sequences. + pattern: "*.{fasta,fna,fa,fas}" + ontologies: + - edam: "http://edamontology.org/format_1929" + + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - database: + type: directory + description: Directory containing CAT_pack database files (e.g. output from + CAT_pack prepare) + pattern: "*/" + ontologies: + - edam: "http://edamontology.org/data_1049" + + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - taxonomy: + type: directory + description: Directory containing CAT_pack taxonomy files (e.g. output from + CAT_pack prepare) + pattern: "*/" + ontologies: + - edam: "http://edamontology.org/data_1049" + + - - meta4: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - proteins: + type: directory + description: Optional pre predicted-made proteins FASTA + pattern: "*.{fasta,faa,fa,fas}" + ontologies: + - edam: "http://edamontology.org/format_1929" + + - - meta5: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - diamond_table: + type: directory + description: Optional pre-made DIAMOND alignment table + pattern: "*" + ontologies: + - edam: "http://edamontology.org/format_3751" + + - bin_suffix: + type: string + description: Suffix to search for in the input files when `bins` is a directory. + +output: + orf2lca: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.ORF2LCA.txt": + type: file + description: A TSV file with per-ORF hit stats and identified lineage + pattern: "*.ORF2LCA.txt" + ontologies: + - edam: "http://edamontology.org/format_3475" + bin2classification: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.bin2classification.txt": + type: file + description: A TSV file with per-bin hit stats and assignment justification + information + pattern: "*.bin2classification.txt" + ontologies: + - edam: "http://edamontology.org/format_3475" + log: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.log": + type: file + description: Log file with run messages and basic statistics + ontologies: + - edam: "http://edamontology.org/format_2330" + diamond: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.diamond": + type: file + description: Intermediate DIAMOND TSV summary output file with alignment results + ontologies: + - edam: "http://edamontology.org/format_3475" + faa: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.predicted_proteins.faa": + type: file + description: FAA file of DIAMOND predicted proteins hits + ontologies: + - edam: "http://edamontology.org/format_3475" + gff: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.gff": + type: file + description: GFF file of DIAMOND predicted proteins hits + ontologies: + - edam: "http://edamontology.org/format_2305" + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/catpack/bins/tests/main.nf.test b/modules/nf-core/catpack/bins/tests/main.nf.test new file mode 100644 index 0000000..d94a95d --- /dev/null +++ b/modules/nf-core/catpack/bins/tests/main.nf.test @@ -0,0 +1,138 @@ +nextflow_process { + + name "Test Process CATPACK_BINS" + script "../main.nf" + process "CATPACK_BINS" + config './nextflow.config' + + tag "modules" + tag "modules_nfcore" + tag "catpack" + tag "catpack/bins" + tag "catpack/prepare" + tag "catpack/contigs" + + setup { + run('CATPACK_PREPARE') { + script '../../prepare/main.nf' + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] ] + input[1] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_nodes.dmp', checkIfExists: true) ] + input[2] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_names.dmp', checkIfExists: true) ] + input[3] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot.accession2taxid.gz', checkIfExists: true) ] + """ + } + } + } + + test("sarscov2 - genome - fasta") { + + when { + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ] + input[1] = CATPACK_PREPARE.out.db + input[2] = CATPACK_PREPARE.out.taxonomy + input[3] = [[:], []] + input[4] = [[:], []] + input[5] = '.fasta' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot ( + process.out.orf2lca, + process.out.bin2classification, + process.out.diamond, + process.out.faa, + process.out.gff, + process.out.versions, + path(process.out.log.get(0).get(1)).readLines().last().contains("CAT is done!") + ).match() + } + ) + } + + } + + test("sarscov2 - genome - fasta - premade proteins") { + + setup { + run('CATPACK_CONTIGS') { + script '../../contigs/main.nf' + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ] + input[1] = CATPACK_PREPARE.out.db + input[2] = CATPACK_PREPARE.out.taxonomy + input[3] = [[:], []] + input[4] = [[:], []] + """ + } + } + } + + when { + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ] + input[1] = CATPACK_PREPARE.out.db + input[2] = CATPACK_PREPARE.out.taxonomy + input[3] = CATPACK_CONTIGS.out.faa + input[4] = [[:], []] + input[5] = '.fasta' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot ( + process.out.orf2lca, + process.out.bin2classification, + process.out.diamond, + process.out.gff, + process.out.versions, + path(process.out.log.get(0).get(1)).readLines().last().contains("CAT is done!") + ).match() + } + ) + } + + } + + test("sarscov2 - genome - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ] + input[1] = CATPACK_PREPARE.out.db + input[2] = CATPACK_PREPARE.out.taxonomy + input[3] = [[:], []] + input[4] = [[:], []] + input[5] = '.fasta' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out + ).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/catpack/bins/tests/main.nf.test.snap b/modules/nf-core/catpack/bins/tests/main.nf.test.snap new file mode 100644 index 0000000..e3c80d7 --- /dev/null +++ b/modules/nf-core/catpack/bins/tests/main.nf.test.snap @@ -0,0 +1,208 @@ +{ + "sarscov2 - genome - fasta": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.ORF2LCA.txt:md5,27e3ca35dc7b977653b5bbf18076fc26" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bin2classification.txt:md5,8cb8c364c1dea229f68e8c7a1747205d" + ] + ], + [ + [ + { + "id": "test" + }, + "test.concatenated.alignment.diamond:md5,9e2f9c188b183c18dd9572395a48a066" + ] + ], + [ + [ + { + "id": "test" + }, + "test.concatenated.predicted_proteins.faa:md5,1f8550f87d044d117422ca02827e4d18" + ] + ], + [ + [ + { + "id": "test" + }, + "test.concatenated.predicted_proteins.gff:md5,cb63331a0282175669107585cf4a66c1" + ] + ], + [ + "versions.yml:md5,ad0d24635fc75542a35bc9d98bc3e6f4" + ], + false + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-23T08:19:03.528409783" + }, + "sarscov2 - genome - fasta - premade proteins": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.ORF2LCA.txt:md5,27e3ca35dc7b977653b5bbf18076fc26" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bin2classification.txt:md5,8cb8c364c1dea229f68e8c7a1747205d" + ] + ], + [ + [ + { + "id": "test" + }, + "test.concatenated.alignment.diamond:md5,9e2f9c188b183c18dd9572395a48a066" + ] + ], + [ + + ], + [ + "versions.yml:md5,ad0d24635fc75542a35bc9d98bc3e6f4" + ], + false + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-23T06:02:16.99538445" + }, + "sarscov2 - genome - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.ORF2LCA.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.bin2classification.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.diamond:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test" + }, + "test.predicted_proteins.faa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test" + }, + "test.predicted_proteins.gff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + "versions.yml:md5,ad0d24635fc75542a35bc9d98bc3e6f4" + ], + "bin2classification": [ + [ + { + "id": "test" + }, + "test.bin2classification.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "diamond": [ + [ + { + "id": "test" + }, + "test.diamond:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "faa": [ + [ + { + "id": "test" + }, + "test.predicted_proteins.faa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gff": [ + [ + { + "id": "test" + }, + "test.predicted_proteins.gff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "orf2lca": [ + [ + { + "id": "test" + }, + "test.ORF2LCA.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,ad0d24635fc75542a35bc9d98bc3e6f4" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-23T05:48:35.765674003" + } +} \ No newline at end of file diff --git a/modules/nf-core/catpack/bins/tests/nextflow.config b/modules/nf-core/catpack/bins/tests/nextflow.config new file mode 100644 index 0000000..c1ac485 --- /dev/null +++ b/modules/nf-core/catpack/bins/tests/nextflow.config @@ -0,0 +1,13 @@ +process { + withName: CATPACK_PREPARE { + ext.args = "--common_prefix database" + } + + withName: CATPACK_BINS { + ext.args = "--bin_suffix .fasta" + } + + withName: CATPACK_ADDNAMES { + ext.args = "--only_official" + } +} diff --git a/modules/nf-core/catpack/download/environment.yml b/modules/nf-core/catpack/download/environment.yml new file mode 100644 index 0000000..39264bf --- /dev/null +++ b/modules/nf-core/catpack/download/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::cat=6.0.1 diff --git a/modules/nf-core/catpack/download/main.nf b/modules/nf-core/catpack/download/main.nf new file mode 100644 index 0000000..3c9f85e --- /dev/null +++ b/modules/nf-core/catpack/download/main.nf @@ -0,0 +1,63 @@ +process CATPACK_DOWNLOAD { + tag "${meta.id}" + label 'process_single' + label 'process_long' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/cat:6.0.1--hdfd78af_1' + : 'biocontainers/cat:6.0.1--hdfd78af_1'}" + + input: + tuple val(meta), val(db) + + output: + tuple val(meta), path("${prefix}/*.${db}.gz"), emit: fasta + tuple val(meta), path("${prefix}/*.names.dmp"), emit: names + tuple val(meta), path("${prefix}/*.nodes.dmp"), emit: nodes + tuple val(meta), path("${prefix}/*accession2taxid*.gz"), emit: acc2tax + tuple val(meta), path("${prefix}/*.log"), emit: log + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + CAT_pack \\ + download \\ + ${args} \\ + --db ${db} \\ + -o ${prefix}/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + catpack: \$(CAT_pack --version | sed 's/CAT_pack pack v//g;s/ .*//g') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "CAT_pack \\ + download \\ + ${args} \\ + --db ${db} + -o ${prefix}/" + + mkdir ${prefix}/ + echo "" | gzip > ${prefix}/${prefix}.${db}.gz + touch ${prefix}/${prefix}.names.dmp + touch ${prefix}/${prefix}.nodes.dmp + echo "" | gzip > ${prefix}/${prefix}.accession2taxid.gz + touch ${prefix}/${prefix}.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + catpack: \$(CAT_pack --version | sed 's/CAT_pack pack v//g;s/ .*//g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/catpack/download/meta.yml b/modules/nf-core/catpack/download/meta.yml new file mode 100644 index 0000000..041e0cd --- /dev/null +++ b/modules/nf-core/catpack/download/meta.yml @@ -0,0 +1,104 @@ +name: "catpack_download" +description: Downloads the required files for either Nr or GTDB for building into + a CAT database +keywords: + - taxonomic classification + - classification + - database + - download +tools: + - "catpack": + description: "CAT/BAT: tool for taxonomic classification of contigs and metagenome-assembled + genomes (MAGs)" + homepage: "https://github.com/MGXlab/CAT_pack" + documentation: "https://github.com/MGXlab/CAT_pack" + tool_dev_url: "https://github.com/MGXlab/CAT_pack" + doi: "10.1186/s13059-019-1817-x" + licence: ["MIT"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - db: + type: string + description: Which database to download + pattern: "nr|GTDB" + +output: + fasta: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - ${prefix}/*.${db}.gz: + type: file + description: FASTA file containing all the NCBI NR or GTDB sequences + pattern: "*.${db}.gz" + ontologies: + - edam: "http://edamontology.org/format_1929" + names: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - ${prefix}/*.names.dmp: + type: file + description: NCBI taxonomy-style names.dmp text file + pattern: "*.names.dmp" + ontologies: + - edam: "http://edamontology.org/format_1964" + nodes: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - ${prefix}/*.nodes.dmp: + type: file + description: NCBI taxonomy-style nodes.dmp text file + pattern: "*.nodes.dmp" + ontologies: + - edam: "http://edamontology.org/format_1964" + acc2tax: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - ${prefix}/*accession2taxid*.gz: + type: file + description: NCBI taxonomy names accession to taxonomy file + pattern: "*accession2taxid*" + ontologies: + - edam: "http://edamontology.org/format_1964" + log: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - ${prefix}/*.log: + type: file + description: Log file of the download process + pattern: "*.log" + ontologies: + - edam: "http://edamontology.org/format_1964" + + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/catpack/download/tests/main.nf.test b/modules/nf-core/catpack/download/tests/main.nf.test new file mode 100644 index 0000000..cc9aae7 --- /dev/null +++ b/modules/nf-core/catpack/download/tests/main.nf.test @@ -0,0 +1,37 @@ +nextflow_process { + + name "Test Process CATPACK_DOWNLOAD" + script "../main.nf" + process "CATPACK_DOWNLOAD" + + tag "modules" + tag "modules_nfcore" + tag "catpack" + tag "catpack/download" + + // Only stub because module downloads extremely large + test("nr - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'nr', single_end:false ], // meta map + 'nr', + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/catpack/download/tests/main.nf.test.snap b/modules/nf-core/catpack/download/tests/main.nf.test.snap new file mode 100644 index 0000000..ddd25b0 --- /dev/null +++ b/modules/nf-core/catpack/download/tests/main.nf.test.snap @@ -0,0 +1,109 @@ +{ + "nr - stub": { + "content": [ + { + "0": [ + [ + { + "id": "nr", + "single_end": false + }, + "nr.nr.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "nr", + "single_end": false + }, + "nr.names.dmp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "nr", + "single_end": false + }, + "nr.nodes.dmp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "nr", + "single_end": false + }, + "nr.accession2taxid.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "4": [ + [ + { + "id": "nr", + "single_end": false + }, + "nr.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + "versions.yml:md5,e1493fe75b3b8cc2bc0cbd1d5ddfad44" + ], + "acc2tax": [ + [ + { + "id": "nr", + "single_end": false + }, + "nr.accession2taxid.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "fasta": [ + [ + { + "id": "nr", + "single_end": false + }, + "nr.nr.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "log": [ + [ + { + "id": "nr", + "single_end": false + }, + "nr.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "names": [ + [ + { + "id": "nr", + "single_end": false + }, + "nr.names.dmp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "nodes": [ + [ + { + "id": "nr", + "single_end": false + }, + "nr.nodes.dmp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1493fe75b3b8cc2bc0cbd1d5ddfad44" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-22T01:16:45.367749401" + } +} \ No newline at end of file diff --git a/modules/nf-core/catpack/prepare/environment.yml b/modules/nf-core/catpack/prepare/environment.yml new file mode 100644 index 0000000..39264bf --- /dev/null +++ b/modules/nf-core/catpack/prepare/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::cat=6.0.1 diff --git a/modules/nf-core/catpack/prepare/main.nf b/modules/nf-core/catpack/prepare/main.nf new file mode 100644 index 0000000..927e4c8 --- /dev/null +++ b/modules/nf-core/catpack/prepare/main.nf @@ -0,0 +1,62 @@ +process CATPACK_PREPARE { + tag "${meta.id}" + label 'process_medium' + label 'process_long' + label 'process_high_memory' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/cat:6.0.1--hdfd78af_1' + : 'biocontainers/cat:6.0.1--hdfd78af_1'}" + + input: + tuple val(meta), path(db_fasta) + path names + path nodes + path acc2tax + + output: + tuple val(meta), path("${prefix}/db/"), emit: db + tuple val(meta), path("${prefix}/tax/"), emit: taxonomy + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + CAT_pack prepare \\ + -n ${task.cpus} \\ + --db_fasta ${db_fasta} \\ + --names ${names} \\ + --nodes ${nodes} \\ + --acc2tax ${acc2tax} \\ + --db_dir ${prefix}/ \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + catpack: \$(CAT_pack --version | sed 's/CAT_pack pack v//g;s/ .*//g') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch database.log + mkdir -p ${prefix}/db + touch ${prefix}/db/database.dmnd + touch ${prefix}/db/database.fastaid2LCAtaxid + touch ${prefix}/db/database.taxids_with_multiple_offspring + mkdir -p ${prefix}/tax + touch ${prefix}/tax/nodes.dmp + touch ${prefix}/tax/names.dmp + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + catpack: \$(CAT_pack --version | sed 's/CAT_pack pack v//g;s/ .*//g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/catpack/prepare/meta.yml b/modules/nf-core/catpack/prepare/meta.yml new file mode 100644 index 0000000..c6f3235 --- /dev/null +++ b/modules/nf-core/catpack/prepare/meta.yml @@ -0,0 +1,89 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "catpack_prepare" +description: Creates a CAT_pack database based on input FASTAs +keywords: + - catpack + - cat + - prepare + - database + - profiling + - build +tools: + - "catpack": + description: "CAT/BAT: tool for taxonomic classification of contigs and metagenome-assembled + genomes (MAGs)" + homepage: "https://github.com/MGXlab/CAT_pack" + documentation: "https://github.com/MGXlab/CAT_pack" + tool_dev_url: "https://github.com/MGXlab/CAT_pack" + doi: "10.1186/s13059-019-1817-x" + licence: ["MIT"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - db_fasta: + type: file + description: A FASTA file containing all sequences to be included in the database + pattern: "*.{fasta,fa,fna}" + ontologies: + - edam: "http://edamontology.org/format_1929" + - names: + type: file + description: An NCBI taxonomy-style names text file + pattern: "*" + ontologies: + - edam: "http://edamontology.org/format_1964" + - nodes: + type: file + description: An NCBI taxonomy-style nodes text file + pattern: "*" + ontologies: + - edam: "http://edamontology.org/format_1964" + - acc2tax: + type: file + description: An NCBI taxonomy names accession to taxonomy file + pattern: "*" + ontologies: + - edam: "http://edamontology.org/format_1964" + +output: + db: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - ${prefix}/db/: + type: directory + description: Directory containing CAT database files + pattern: "${db}/" + ontologies: + - edam: "http://edamontology.org/data_1049" + taxonomy: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - ${prefix}/tax/: + type: directory + description: Directory containing CAT prepared taxonomy database files + pattern: "${db}/" + ontologies: + - edam: "http://edamontology.org/data_1049" + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/catpack/prepare/tests/main.nf.test b/modules/nf-core/catpack/prepare/tests/main.nf.test new file mode 100644 index 0000000..4e96ad9 --- /dev/null +++ b/modules/nf-core/catpack/prepare/tests/main.nf.test @@ -0,0 +1,68 @@ +nextflow_process { + + name "Test Process CATPACK_PREPARE" + script "../main.nf" + process "CATPACK_PREPARE" + config './nextflow.config' + + tag "modules" + tag "modules_nfcore" + tag "catpack" + tag "catpack/prepare" + + test("metagenome -sarscov2 - fasta") { + + when { + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] ] + input[1] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_nodes.dmp', checkIfExists: true) ] + input[2] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_names.dmp', checkIfExists: true) ] + input[3] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot.accession2taxid.gz', checkIfExists: true) ] + """ + } + } + + then { + def stablefiles = [] + file(process.out.db.get(0).get(1)).eachFileRecurse{ file -> if (!file.isDirectory() && !["database.dmnd", "database.log", "database.fastaid2LCAtaxid", "database.taxids_with_multiple_offspring"].find {file.toString().endsWith(it)}) {stablefiles.add(file)} } + def unstablefiles = [] + file(process.out.db.get(0).get(1)).eachFileRecurse{ file -> if (["database.dmnd", "database.log", "database.fastaid2LCAtaxid", "database.taxids_with_multiple_offspring"].find {file.toString().endsWith(it)}) {unstablefiles.add(file.getName().toString())} } + assertAll( + { assert process.success }, + { assert snapshot( + stablefiles.sort(), + unstablefiles.sort(), + process.out.taxonomy, + process.out.versions + ).match() } + ) + } + + } + + test("metagenome -sarscov2 - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] ] + input[1] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_nodes.dmp', checkIfExists: true) ] + input[2] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_names.dmp', checkIfExists: true) ] + input[3] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot.accession2taxid.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/catpack/prepare/tests/main.nf.test.snap b/modules/nf-core/catpack/prepare/tests/main.nf.test.snap new file mode 100644 index 0000000..0a8975f --- /dev/null +++ b/modules/nf-core/catpack/prepare/tests/main.nf.test.snap @@ -0,0 +1,96 @@ +{ + "metagenome -sarscov2 - fasta": { + "content": [ + [ + + ], + [ + "database.dmnd", + "database.fastaid2LCAtaxid", + "database.taxids_with_multiple_offspring" + ], + [ + [ + { + "id": "test" + }, + [ + "names.dmp:md5,c471c27a4ce85ae74d2c63633c9ce1e3", + "nodes.dmp:md5,130f9132095562e09c732679c562f5e9" + ] + ] + ], + [ + "versions.yml:md5,d851e296d4025a8060b6283ad3b63937" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-25T16:19:17.838393903" + }, + "metagenome -sarscov2 - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "database.dmnd:md5,d41d8cd98f00b204e9800998ecf8427e", + "database.fastaid2LCAtaxid:md5,d41d8cd98f00b204e9800998ecf8427e", + "database.taxids_with_multiple_offspring:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + { + "id": "test" + }, + [ + "names.dmp:md5,d41d8cd98f00b204e9800998ecf8427e", + "nodes.dmp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "2": [ + "versions.yml:md5,d851e296d4025a8060b6283ad3b63937" + ], + "db": [ + [ + { + "id": "test" + }, + [ + "database.dmnd:md5,d41d8cd98f00b204e9800998ecf8427e", + "database.fastaid2LCAtaxid:md5,d41d8cd98f00b204e9800998ecf8427e", + "database.taxids_with_multiple_offspring:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "taxonomy": [ + [ + { + "id": "test" + }, + [ + "names.dmp:md5,d41d8cd98f00b204e9800998ecf8427e", + "nodes.dmp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,d851e296d4025a8060b6283ad3b63937" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2025-01-19T07:53:12.595910196" + } +} \ No newline at end of file diff --git a/modules/nf-core/catpack/prepare/tests/nextflow.config b/modules/nf-core/catpack/prepare/tests/nextflow.config new file mode 100644 index 0000000..07f3352 --- /dev/null +++ b/modules/nf-core/catpack/prepare/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: CATPACK_PREPARE { + ext.args = "--common_prefix database" + } +} diff --git a/modules/nf-core/catpack/summarise/environment.yml b/modules/nf-core/catpack/summarise/environment.yml new file mode 100644 index 0000000..39264bf --- /dev/null +++ b/modules/nf-core/catpack/summarise/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::cat=6.0.1 diff --git a/modules/nf-core/catpack/summarise/main.nf b/modules/nf-core/catpack/summarise/main.nf new file mode 100644 index 0000000..b2cea90 --- /dev/null +++ b/modules/nf-core/catpack/summarise/main.nf @@ -0,0 +1,62 @@ +process CATPACK_SUMMARISE { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/cat:6.0.1--hdfd78af_1' + : 'biocontainers/cat:6.0.1--hdfd78af_1'}" + + input: + tuple val(meta), path(classification) + tuple val(meta2), path(contigs) + + output: + tuple val(meta), path("*.txt"), emit: txt + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("${classification}" == "${prefix}.txt") { + error("Input and output names are the same, set prefix in module configuration to disambiguate!") + } + def insert_contigs = contigs ? "-c ${contigs}" : '' + """ + CAT_pack summarise \\ + ${args} \\ + -i ${classification} \\ + ${insert_contigs} \\ + -o ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + catpack: \$(CAT_pack --version | sed 's/CAT_pack pack v//g;s/ .*//g') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("${classification}" == "${prefix}.txt") { + error("Input and output names are the same, set prefix in module configuration to disambiguate!") + } + def insert_contigs = contigs ? "-c ${contigs}" : '' + """ + echo "CAT_pack summarise \\ + ${args} \\ + -i ${classification} \\ + ${insert_contigs} \\ + -o ${prefix}.txt" + + touch ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + catpack: \$(CAT_pack --version | sed 's/CAT_pack pack v//g;s/ .*//g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/catpack/summarise/meta.yml b/modules/nf-core/catpack/summarise/meta.yml new file mode 100644 index 0000000..444d24c --- /dev/null +++ b/modules/nf-core/catpack/summarise/meta.yml @@ -0,0 +1,76 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "catpack_summarise" +description: Summarises results from CAT/BAT/RAT classification steps +keywords: + - taxonomic classification + - classification + - long reads + - mags + - assembly +tools: + - "catpack": + description: "CAT/BAT: tool for taxonomic classification of contigs and metagenome-assembled + genomes (MAGs)" + homepage: "https://github.com/MGXlab/CAT_pack" + documentation: "https://github.com/MGXlab/CAT_pack" + tool_dev_url: "https://github.com/MGXlab/CAT_pack" + doi: "10.1186/s13059-019-1817-x" + licence: ["MIT"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - classification: + type: file + description: CAT/BAT/RAT classification table annotated with official names + (from CAT_pack addnames) + pattern: "*.txt" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - contigs: + type: file + description: Optional nucleotide FASTA file containing long DNA sequences such + as contigs that were classified (only if classification table is from CAT_pack + contigs) + pattern: "*.{fasta,fna,fa,fas}" + ontologies: + - edam: "http://edamontology.org/format_1929" + +output: + txt: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.txt": + type: file + description: | + Summary statistics table of CAT/BAT/RAT results + pattern: "*.txt" + ontologies: + - edam: "http://edamontology.org/format_3475" + + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/catpack/summarise/tests/main.nf.test b/modules/nf-core/catpack/summarise/tests/main.nf.test new file mode 100644 index 0000000..03ef73b --- /dev/null +++ b/modules/nf-core/catpack/summarise/tests/main.nf.test @@ -0,0 +1,159 @@ +nextflow_process { + + name "Test Process CATPACK_SUMMARISE" + script "../main.nf" + process "CATPACK_SUMMARISE" + config './nextflow.config' + + tag "modules" + tag "modules_nfcore" + tag "catpack" + tag "catpack/summarise" + tag "catpack/prepare" + tag "catpack/contigs" + tag "catpack/bins" + tag "catpack/addnames" + + setup { + run('CATPACK_PREPARE') { + script '../../prepare/main.nf' + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] ] + input[1] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_nodes.dmp', checkIfExists: true) ] + input[2] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_names.dmp', checkIfExists: true) ] + input[3] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot.accession2taxid.gz', checkIfExists: true) ] + """ + } + } + } + + test("sarscov2 - contigs - with names") { + + setup { + run('CATPACK_CONTIGS') { + script '../../contigs/main.nf' + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ] + input[1] = CATPACK_PREPARE.out.db + input[2] = CATPACK_PREPARE.out.taxonomy + input[3] = [[:], []] + input[4] = [[:], []] + """ + } + } + + run('CATPACK_ADDNAMES') { + script '../../addnames/main.nf' + process { + """ + input[0] = CATPACK_CONTIGS.out.contig2classification + input[1] = CATPACK_PREPARE.out.taxonomy + """ + } + } + } + + when { + process { + """ + input[0] = CATPACK_ADDNAMES.out.txt + input[1] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bins - raw") { + + setup { + run('CATPACK_BINS') { + script '../../bins/main.nf' + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ] + input[1] = CATPACK_PREPARE.out.db + input[2] = CATPACK_PREPARE.out.taxonomy + input[3] = [[:], []] + input[4] = [[:], []] + input[5] = '.fasta' + """ + } + } + + run('CATPACK_ADDNAMES') { + script '../../addnames/main.nf' + process { + """ + input[0] = CATPACK_BINS.out.bin2classification + input[1] = CATPACK_PREPARE.out.taxonomy + """ + } + } + } + + when { + process { + """ + input[0] = CATPACK_ADDNAMES.out.txt + input[1] = [[:],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - contigs - stub") { + + options "-stub" + + setup { + run('CATPACK_CONTIGS') { + script '../../contigs/main.nf' + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ] + input[1] = CATPACK_PREPARE.out.db + input[2] = CATPACK_PREPARE.out.taxonomy + input[3] = [[:], []] + input[4] = [[:], []] + """ + } + } + } + + when { + process { + """ + input[0] = CATPACK_CONTIGS.out.contig2classification + input[1] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/catpack/summarise/tests/main.nf.test.snap b/modules/nf-core/catpack/summarise/tests/main.nf.test.snap new file mode 100644 index 0000000..72bd16c --- /dev/null +++ b/modules/nf-core/catpack/summarise/tests/main.nf.test.snap @@ -0,0 +1,101 @@ +{ + "sarscov2 - contigs - with names": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_summary.txt:md5,f4ae1e889f762d6123e4b8d9e57aa4cc" + ] + ], + "1": [ + "versions.yml:md5,6f0ab36312198982a6de7194fef81c72" + ], + "txt": [ + [ + { + "id": "test" + }, + "test_summary.txt:md5,f4ae1e889f762d6123e4b8d9e57aa4cc" + ] + ], + "versions": [ + "versions.yml:md5,6f0ab36312198982a6de7194fef81c72" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-24T06:27:47.176876392" + }, + "sarscov2 - contigs - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,6f0ab36312198982a6de7194fef81c72" + ], + "txt": [ + [ + { + "id": "test" + }, + "test_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,6f0ab36312198982a6de7194fef81c72" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-24T06:28:08.710621691" + }, + "sarscov2 - bins - raw": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_summary.txt:md5,2fc7ecc57cfe0c3362fba27e9693d266" + ] + ], + "1": [ + "versions.yml:md5,6f0ab36312198982a6de7194fef81c72" + ], + "txt": [ + [ + { + "id": "test" + }, + "test_summary.txt:md5,2fc7ecc57cfe0c3362fba27e9693d266" + ] + ], + "versions": [ + "versions.yml:md5,6f0ab36312198982a6de7194fef81c72" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-24T06:27:58.559081161" + } +} diff --git a/modules/nf-core/catpack/summarise/tests/nextflow.config b/modules/nf-core/catpack/summarise/tests/nextflow.config new file mode 100644 index 0000000..0302d9c --- /dev/null +++ b/modules/nf-core/catpack/summarise/tests/nextflow.config @@ -0,0 +1,17 @@ +process { + withName: CATPACK_PREPARE { + ext.args = "--common_prefix database" + } + + withName: CATPACK_BINS { + ext.args = "--bin_suffix .fasta" + } + + withName: CATPACK_ADDNAMES { + ext.args = "--only_official" + } + + withName: CATPACK_SUMMARISE { + ext.prefix = { "${meta.id}_summary" } + } +} diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml new file mode 100644 index 0000000..9b926b1 --- /dev/null +++ b/modules/nf-core/untar/environment.yml @@ -0,0 +1,12 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::coreutils=9.5 + - conda-forge::grep=3.11 + - conda-forge::gzip=1.13 + - conda-forge::lbzip2=2.5 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf new file mode 100644 index 0000000..e712ebe --- /dev/null +++ b/modules/nf-core/untar/main.nf @@ -0,0 +1,84 @@ +process UNTAR { + tag "${archive}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/52/52ccce28d2ab928ab862e25aae26314d69c8e38bd41ca9431c67ef05221348aa/data' + : 'community.wave.seqera.io/library/coreutils_grep_gzip_lbzip2_pruned:838ba80435a629f8'}" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("${prefix}"), emit: untar + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: (meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) + + """ + mkdir ${prefix} + + ## Ensures --strip-components only applied when top level of tar contents is a directory + ## If just files or multiple directories, place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + tar \\ + -C ${prefix} --strip-components 1 \\ + -xavf \\ + ${args} \\ + ${archive} \\ + ${args2} + else + tar \\ + -C ${prefix} \\ + -xavf \\ + ${args} \\ + ${archive} \\ + ${args2} + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: (meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) + """ + mkdir ${prefix} + ## Dry-run untaring the archive to get the files and place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + for i in `tar -tf ${archive}`; + do + if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]]; + then + touch \${i} + else + mkdir -p \${i} + fi + done + else + for i in `tar -tf ${archive}`; + do + if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]]; + then + touch ${prefix}/\${i} + else + mkdir -p ${prefix}/\${i} + fi + done + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml new file mode 100644 index 0000000..1603e38 --- /dev/null +++ b/modules/nf-core/untar/meta.yml @@ -0,0 +1,57 @@ +name: untar +description: Extract files from tar, tar.gz, tar.bz2, tar.xz archives +keywords: + - untar + - uncompress + - extract +tools: + - untar: + description: | + Extract tar, tar.gz, tar.bz2, tar.xz files. + documentation: https://www.gnu.org/software/tar/manual/ + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be untarred + pattern: "*.{tar,tar.gz,tar.bz2,tar.xz}" + ontologies: + - edam: http://edamontology.org/format_3981 # TAR format + - edam: http://edamontology.org/format_3989 # GZIP format +output: + untar: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*/" + - ${prefix}: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*/" + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test new file mode 100644 index 0000000..c957517 --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test @@ -0,0 +1,85 @@ +nextflow_process { + + name "Test Process UNTAR" + script "../main.nf" + process "UNTAR" + tag "modules" + tag "modules_nfcore" + tag "untar" + + test("test_untar") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar_onlyfiles") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar_onlyfiles - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } +} diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap new file mode 100644 index 0000000..ceb91b7 --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test.snap @@ -0,0 +1,158 @@ +{ + "test_untar_onlyfiles": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:28.231047" + }, + "test_untar_onlyfiles - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:45.773103" + }, + "test_untar - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:36.777441" + }, + "test_untar": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:19.377674" + } +} \ No newline at end of file diff --git a/workflows/genomesubmit.nf b/workflows/genomesubmit.nf index 90e0cf9..eecd40b 100644 --- a/workflows/genomesubmit.nf +++ b/workflows/genomesubmit.nf @@ -12,6 +12,13 @@ include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pi include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_seqsubmit_pipeline' +// include { CATPACK_PREPARE } from '../modules/nf-core/catpack/prepare/main' +// include { CATPACK_DOWNLOAD } from '../modules/nf-core/catpack/download/main' +// include { CATPACK_ADDNAMES } from '../modules/nf-core/catpack/addnames/main' +// include { CATPACK_BINS } from '../modules/nf-core/catpack/bins/main' +// include { CATPACK_SUMMARISE } from '../modules/nf-core/catpack/summarise/main' +// include { UNTAR } from '../modules/nf-core/untar/main' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -30,10 +37,120 @@ workflow GENOMESUBMIT { ch_multiqc_files = Channel.empty() // Create channel with meta and fasta - ch_mags = ch_samplesheet - .map { row -> - [ row[0], file(row[1]) ] - } + ch_mags = ch_samplesheet.map { row -> [row[0], file(row[1])] } + + // adapted from: + // - https://github.com/nf-core/mag/blob/b0bc5cae64fdd7fa6aec76f270b2daf7882ed84b/subworkflows/local/catpack/main.nf#L15 + // - https://github.com/nf-core/seqsubmit/pull/19/files + + branched = ch_samplesheet.branch { row_items -> + def lineage = row_items[16] + def check_lineage_missing = (!lineage || lineage == []) + incomplete: check_lineage_missing + complete: !check_lineage_missing + } + + // Download catpack database if any lineage values are missing & no db path provided + // if (params.cat_db_path & file(params.cat_db_path).exists()) { + // if (params.cat_db_path.endsWith('.tar.gz')) { + // UNTAR([[id: 'cat_db'], file(params.cat_db, checkIfExists: true)]) + // ch_versions = ch_versions.mix(UNTAR.out.versions) + + // ch_cat_db_dir = UNTAR.out.untar + // } + // else { + // ch_cat_db_dir = Channel.fromPath(params.cat_db, checkIfExists: true, type: 'dir') + // .map { dir -> [[id: 'cat_db'], dir] } + // .first() + // } + + // ch_cat_db = ch_cat_db_dir.multiMap { meta, dir -> + // db: [meta, file(dir / 'db', checkIfExists: true)] + // taxonomy: [meta, file(dir / 'tax', checkIfExists: true)] + // } + // } + // else { + // CATPACK_DOWNLOAD([[id: 'cat_db_nr'], 'nr']) + // ch_versions = ch_versions.mix(CATPACK_DOWNLOAD.out.versions) + + // CATPACK_PREPARE( + // CATPACK_DOWNLOAD.out.fasta, + // CATPACK_DOWNLOAD.out.names.map { _meta, names -> names }, + // CATPACK_DOWNLOAD.out.nodes.map { _meta, nodes -> nodes }, + // CATPACK_DOWNLOAD.out.acc2tax.map { _meta, acc2tax -> acc2tax }, + // ) + // ch_versions = ch_versions.mix(CATPACK_PREPARE.out.versions) + // ch_cat_db = CATPACK_PREPARE.out + // } + + // CATPACK_BINS( + // ch_bins, + // ch_cat_db.db, + // ch_cat_db.taxonomy, + // [[:], []], + // [[:], []], + // '.fa', + // ) + // ch_versions = ch_versions.mix(CATPACK_BINS.out.versions) + + // CATPACK_ADDNAMES(CATPACK_BINS.out.bin2classification, ch_cat_db.taxonomy) + // ch_versions = ch_versions.mix(CATPACK_ADDNAMES.out.versions) + + // bin_summary = CATPACK_ADDNAMES.out.txt + // .map { _meta, summary -> summary } + // .collectFile( + // name: 'bat_summary.tsv', + // storeDir: "${params.outdir}/Taxonomy/CAT/", + // keepHeader: true, + // ) + + // if (!params.cat_allow_unofficial_lineages) { + // CATPACK_SUMMARISE(CATPACK_ADDNAMES.out.txt, [[:], []]) + // ch_versions = ch_versions.mix(CATPACK_SUMMARISE.out.versions) + // } + + // CHECKM2_PREDICT( + // branched.incomplete.map { row -> + // [row[0], file(row[1])] + // }, + // ch_check2_db, + // ) + + + // checkm2_ver = CHECKM2_PREDICT.out.versions + // .map { yml -> + // yml.readLines()[1].split(': ')[1] + // } + // .map { ver -> "CheckM2_v" + ver } + + + // // Join CheckM2 results with incomplete samples and fill in completeness/contamination + // ch_checkm2_filled = CHECKM2_PREDICT.out.checkm2_tsv + // .map { meta, tsv -> + // def rows = tsv.splitCsv(sep: '\t', header: true) + // // CheckM2 output has one row per genome, extract first row + // def row = rows[0] + // [meta.id, row.Completeness, row.Contamination] + // } + // .combine(checkm2_ver) + // .cross(branched.incomplete.map { row -> [row[0].id, row] }) + // .map { checkm2_result, incomplete_row -> + // def id = checkm2_result[0] + // def completeness = checkm2_result[1] + // def contamination = checkm2_result[2] + // def tool_ver = checkm2_result[3] + // def row = incomplete_row[1] + + // // Fill in col 7 (completeness) and col 8 (contamination) + // row[6] = tool_ver + // row[7] = completeness + // row[8] = contamination + + // row + // } + + // Combine filled incomplete samples with complete samples + // ch_samplesheet = branched.complete.mix(ch_checkm2_filled) // Create TSV with metadata fields ch_remaining_tsv = ch_samplesheet