diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cd2befd..bae2e47 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,7 +31,7 @@ jobs: matrix: NXF_VER: - "24.04.1" - - "latest-everything" + - "latest-stable" tags: - "workflows" - "pipeline" diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf new file mode 100644 index 0000000..929f3a8 --- /dev/null +++ b/modules/nf-core/samtools/index/main.nf @@ -0,0 +1,49 @@ +process SAMTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'community.wave.seqera.io/library/samtools:1.22.1--eccb42ff8fb55509' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai"), optional:true, emit: crai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + index \\ + -@ ${task.cpus} \\ + $args \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def extension = file(input).getExtension() == 'cram' ? + "crai" : args.contains("-c") ? "csi" : "bai" + """ + touch ${input}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml new file mode 100644 index 0000000..1bed6bc --- /dev/null +++ b/modules/nf-core/samtools/index/meta.yml @@ -0,0 +1,77 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: input file + ontologies: [] +output: + bai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + ontologies: [] + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: CSI index file + pattern: "*.{csi}" + ontologies: [] + crai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" +maintainers: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/index/tests/csi.nextflow.config b/modules/nf-core/samtools/index/tests/csi.nextflow.config new file mode 100644 index 0000000..0ed260e --- /dev/null +++ b/modules/nf-core/samtools/index/tests/csi.nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_INDEX { + ext.args = '-c' + } + +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test new file mode 100644 index 0000000..ca34fb5 --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -0,0 +1,140 @@ +nextflow_process { + + name "Test Process SAMTOOLS_INDEX" + script "../main.nf" + process "SAMTOOLS_INDEX" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/index" + + test("bai") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("crai") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("csi") { + config "./csi.nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.csi[0][1]).name, + process.out.versions + ).match() } + ) + } + } + + test("bai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("crai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("csi - stub") { + options "-stub" + config "./csi.nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap new file mode 100644 index 0000000..72d65e8 --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -0,0 +1,250 @@ +{ + "csi - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:25.261127166" + }, + "crai - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:12.653194876" + }, + "bai - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:01.854932651" + }, + "csi": { + "content": [ + "test.paired_end.sorted.bam.csi", + [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:51.485364222" + }, + "crai": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:40.518873972" + }, + "bai": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:21.184050361" + } +} \ No newline at end of file diff --git a/tests/workflows/sampletracking.nf.test.snap b/tests/workflows/sampletracking.nf.test.snap index 31b5c36..aa7020d 100644 --- a/tests/workflows/sampletracking.nf.test.snap +++ b/tests/workflows/sampletracking.nf.test.snap @@ -34,6 +34,7 @@ ] ], "4": [ + "versions.yml:md5,63915ecd9cc27a8026cca35ada889a22", "versions.yml:md5,dac5a146f64564be62294d29fe21cb67" ], "crosscheck_metrics": [ @@ -68,15 +69,16 @@ ] ], "versions": [ + "versions.yml:md5,63915ecd9cc27a8026cca35ada889a22", "versions.yml:md5,dac5a146f64564be62294d29fe21cb67" ] } ], "meta": { - "nf-test": "0.9.1", - "nextflow": "24.10.3" + "nf-test": "0.9.2", + "nextflow": "25.04.6" }, - "timestamp": "2025-01-21T11:08:41.920631027" + "timestamp": "2025-08-11T15:33:22.286114144" }, "Should run without failures": { "content": [ @@ -121,6 +123,7 @@ ] ], "4": [ + "versions.yml:md5,63915ecd9cc27a8026cca35ada889a22", "versions.yml:md5,7442362a1e457dad7ce796c703bd6380", "versions.yml:md5,ab16dda2f91f60705355cab06b0145bf", "versions.yml:md5,dac5a146f64564be62294d29fe21cb67" @@ -165,6 +168,7 @@ ] ], "versions": [ + "versions.yml:md5,63915ecd9cc27a8026cca35ada889a22", "versions.yml:md5,7442362a1e457dad7ce796c703bd6380", "versions.yml:md5,ab16dda2f91f60705355cab06b0145bf", "versions.yml:md5,dac5a146f64564be62294d29fe21cb67" @@ -172,9 +176,9 @@ } ], "meta": { - "nf-test": "0.9.1", - "nextflow": "24.10.3" + "nf-test": "0.9.2", + "nextflow": "25.04.6" }, - "timestamp": "2025-01-21T11:08:18.733645178" + "timestamp": "2025-08-11T15:32:16.596084438" } } \ No newline at end of file diff --git a/workflows/sampletracking.nf b/workflows/sampletracking.nf index 53bb9ad..803542f 100644 --- a/workflows/sampletracking.nf +++ b/workflows/sampletracking.nf @@ -14,6 +14,9 @@ include { samplesheetToList } from 'plugin/nf-schema' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_sampletracking_pipeline' +include { SAMTOOLS_INDEX } from '../modules/nf-core/samtools/index' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_SNP_BAM } from '../modules/nf-core/samtools/index' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -40,12 +43,74 @@ workflow SAMPLETRACKING { def ch_multiqc_files = Channel.empty() def ch_pool_multiqc_files = Channel.empty() + + def (ch_sample, ch_snp, ch_rest) = ch_samplesheet + .multiMap {meta, sample_bam, sample_bam_index, snp_fastq, snp_bam, snp_bam_index -> + sample: [meta, sample_bam, sample_bam_index] + snp : [meta, snp_bam, snp_bam_index] + rest : [meta, snp_fastq] + } + + def (ch_sample_with_idx, ch_sample_no_idx) = ch_sample.branch { + _meta, _sample_bam, sample_bam_index -> + with_index: sample_bam_index + no_index : !sample_bam_index + } + + SAMTOOLS_INDEX( + ch_sample_no_idx.map { meta, sample_bam, _sample_bam_index -> [meta, sample_bam] } + ) + + def ch_sample_idx_all = SAMTOOLS_INDEX.out.bai + .mix(SAMTOOLS_INDEX.out.csi) + .mix(SAMTOOLS_INDEX.out.crai) + + def ch_sample_fixed = ch_sample_with_idx.mix( + ch_sample_no_idx + .join(ch_sample_idx_all, by: 0) + .map { meta, sample_bam, _old_index, new_index -> [meta, sample_bam, new_index] } + ) + + def (ch_snp_with_idx, ch_snp_no_idx, ch_snp_none) = ch_snp.branch { + _meta, snp_bam, snp_bam_index -> + none : !snp_bam + with_index: snp_bam_index + no_index : !snp_bam_index + } + + SAMTOOLS_INDEX_SNP_BAM( + ch_snp_no_idx.map { meta, snp_bam, _snp_bam_index -> [meta, snp_bam] } + ) + + def ch_snp_idx_all = SAMTOOLS_INDEX_SNP_BAM.out.bai + .mix(SAMTOOLS_INDEX_SNP_BAM.out.csi) + .mix(SAMTOOLS_INDEX_SNP_BAM.out.crai) + + def ch_snp_fixed = ch_snp_with_idx.mix( + ch_snp_no_idx + .join(ch_snp_idx_all, by: 0) + .map { meta, snp_bam, _old_index, new_index -> [meta, snp_bam, new_index] } + ).mix( + ch_snp_none + ) + + def ch_samplesheet_fixed = ch_rest + .join(ch_sample_fixed, by: 0) + .join(ch_snp_fixed, by: 0) + .map { meta, snp_fastq, sample_bam, sample_bam_index, snp_bam, snp_bam_index -> + [meta, sample_bam, sample_bam_index, snp_fastq, snp_bam, snp_bam_index] + } + + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_SNP_BAM.out.versions.first()) + + // // Crosscheck fingerprints // def ch_crosscheck_metrics_out = Channel.empty() - ch_samplesheet + ch_samplesheet_fixed .filter { meta, _sample_bam, _sample_bam_index, snp_fastq, snp_bam, _snp_bam_index -> if(!snp_bam && !snp_fastq) { log.warn("No SNP BAM/CRAM/FASTQ files were detected for '${meta.id}'. Skipping the crosscheck fingerprints step for this sample.") @@ -61,11 +126,12 @@ workflow SAMPLETRACKING { } .set{ ch_inputs } - ch_inputs.to_align.multiMap{ meta, sample_bam, sample_bam_index, snp_fastq -> - fastq: [meta, snp_fastq] - bam: [meta, sample_bam, sample_bam_index] - } - .set{ ch_to_align } + ch_inputs.to_align.multiMap { meta, sample_bam, sample_bam_index, snp_fastq -> + fastq: [meta, snp_fastq] + bam: [meta, sample_bam, sample_bam_index] + } + .set { ch_to_align } + BWA_MEM( ch_to_align.fastq, @@ -90,6 +156,7 @@ workflow SAMPLETRACKING { .dump(tag: "Samples to fingerprint", pretty: true) .set{ch_to_fingerprint} + PICARD_CROSSCHECKFINGERPRINTS( ch_to_fingerprint, ch_fasta_fai @@ -102,9 +169,8 @@ workflow SAMPLETRACKING { // // Determine sample sex // - def ch_sex_prediction_out = Channel.empty() - ch_samplesheet + ch_samplesheet_fixed .map { meta, sample_bam, sample_bam_index, _snp_fastq, _snp_bam, _snp_bam_index -> [ meta, sample_bam, sample_bam_index ] } @@ -191,7 +257,6 @@ workflow SAMPLETRACKING { ch_pool_multiqc_files = ch_pool_multiqc_files.mix(ch_sex_prediction_configs) - // // Collate and save software versions //