Build branch main with version main (4adb388)

Build pipeline: viash-hub.rnaseq.main-c9pqm

Source commit: 4adb388c7d

Source message: Merge pull request #29 from viash-hub/add-labels

Add labels (and then some)
This commit is contained in:
CI
2024-12-05 08:52:37 +00:00
parent b649544267
commit d69226987e
371 changed files with 11102 additions and 6497 deletions

View File

@@ -448,6 +448,9 @@ resources:
path: "main.nf"
is_executable: true
entrypoint: "run_wf"
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
description: "A viash sub-workflow for genome alignment and quantification stage of\
\ nf-core/rnaseq pipeline.\n"
info: null
@@ -578,6 +581,8 @@ runners:
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
@@ -590,8 +595,8 @@ build_info:
output: "target/executable/workflows/genome_alignment_and_quant"
executable: "target/executable/workflows/genome_alignment_and_quant/genome_alignment_and_quant"
viash_version: "0.9.0"
git_commit: "d63250ae704b6bd9a11792c4f979be2309999a2e"
git_remote: "https://x-access-token:ghs_ri9q8QB7jMaviilgc8bYrtSqMrabrg3q8JZY@github.com/viash-hub/rnaseq"
git_commit: "4adb388c7d928eea02bccf890214f3b932b08714"
git_remote: "https://x-access-token:ghs_MW7eJel8BXirGBmM5nVBbQvzqvrjaH0zDh8H@github.com/viash-hub/rnaseq"
dependencies:
- "target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_align_reads"
- "target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_sort"
@@ -623,8 +628,10 @@ package_config:
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\
\ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
)'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"

View File

@@ -1326,86 +1326,92 @@ workflow run_wf {
[ id, state + [ paired: paired, input: input ] ]
}
| star_align_reads.run (
runIf: { id, state -> state.aligner == 'star_salmon' },
fromState: [
"input": "fastq_1",
"input_r2": "fastq_2",
"genome_dir": "star_index",
"sjdb_gtf_file": "gtf",
"out_sam_attr_rg_line": "star_sam_attr_rg_line",
"sjdb_gtf_feature_exon": "star_sjdb_gtf_feature_exon"
],
toState: [
"genome_bam": "aligned_reads",
"transcriptome_bam": "reads_aligned_to_transcriptome",
"star_multiqc": "log"
],
args: [
quant_mode: "TranscriptomeSAM",
twopass_mode: "Basic",
out_sam_type: "BAM;Unsorted",
run_rng_seed: 0,
out_filter_multimap_nmax: 20,
align_sjdb_overhang_min: 1,
out_sam_attributes: "NH;HI;AS;NM;MD",
quant_transcriptome_sam_output: "BanSingleEnd"
]
| star_align_reads.run(
runIf: { id, state -> state.aligner == 'star_salmon' },
fromState: [
"input": "fastq_1",
"input_r2": "fastq_2",
"genome_dir": "star_index",
"sjdb_gtf_file": "gtf",
"out_sam_attr_rg_line": "star_sam_attr_rg_line",
"sjdb_gtf_feature_exon": "star_sjdb_gtf_feature_exon"
],
toState: [
"genome_bam": "aligned_reads",
"transcriptome_bam": "reads_aligned_to_transcriptome",
"star_multiqc": "log"
],
args: [
quant_mode: "TranscriptomeSAM",
twopass_mode: "Basic",
out_sam_type: "BAM;Unsorted",
run_rng_seed: 0,
out_filter_multimap_nmax: 20,
align_sjdb_overhang_min: 1,
out_sam_attributes: "NH;HI;AS;NM;MD",
quant_transcriptome_sam_output: "BanSingleEnd"
],
directives: [ label: [ "highmem", "midcpu" ] ]
)
// GENOME BAM
| samtools_sort.run (
runIf: { id, state -> state.aligner == 'star_salmon' },
fromState: ["input": "genome_bam"],
toState: ["genome_bam_sorted": "output"],
key: "genome_sorted"
| samtools_sort.run(
runIf: { id, state -> state.aligner == 'star_salmon' },
fromState: ["input": "genome_bam"],
toState: ["genome_bam_sorted": "output"],
key: "genome_sorted",
directives: [ label: [ "highmem", "midcpu" ] ]
)
| samtools_index.run (
runIf: { id, state -> state.aligner == 'star_salmon' },
fromState: [
"input": "genome_bam_sorted",
"csi": "bam_csi_index"
],
toState: [ "genome_bam_index": "output" ],
key: "genome_sorted"
| samtools_index.run(
runIf: { id, state -> state.aligner == 'star_salmon' },
fromState: [
"input": "genome_bam_sorted",
"csi": "bam_csi_index"
],
toState: [ "genome_bam_index": "output" ],
key: "genome_sorted",
directives: [ label: [ "highmem", "midcpu" ] ]
)
| samtools_stats.run (
runIf: { id, state -> state.aligner == 'star_salmon' },
fromState: [
"input": "genome_bam_sorted",
"bai": "genome_bam_index",
"fasta": "fasta"
],
toState: [ "genome_bam_stats": "output" ],
key: "genome_stats"
| samtools_stats.run(
runIf: { id, state -> state.aligner == 'star_salmon' },
fromState: [
"input": "genome_bam_sorted",
"bai": "genome_bam_index",
"fasta": "fasta"
],
toState: [ "genome_bam_stats": "output" ],
key: "genome_stats",
directives: [ label: [ "highmem", "midcpu" ] ]
)
| samtools_flagstat.run (
runIf: { id, state -> state.aligner == 'star_salmon' },
fromState: [
"bam": "genome_bam_sorted",
"bai": "genome_bam_index",
"fasta": "fasta"
],
toState: [ "genome_bam_flagstat": "output" ],
key: "genome_flagstat"
| samtools_flagstat.run(
runIf: { id, state -> state.aligner == 'star_salmon' },
fromState: [
"bam": "genome_bam_sorted",
"bai": "genome_bam_index",
"fasta": "fasta"
],
toState: [ "genome_bam_flagstat": "output" ],
key: "genome_flagstat",
directives: [ label: [ "highmem", "midcpu" ] ]
)
| samtools_idxstats.run(
runIf: { id, state -> state.aligner == 'star_salmon' },
fromState: [
"bam": "genome_bam_sorted",
"bai": "genome_bam_index",
"fasta": "fasta"
],
toState: [ "genome_bam_idxstats": "output" ],
key: "genome_idxstats"
runIf: { id, state -> state.aligner == 'star_salmon' },
fromState: [
"bam": "genome_bam_sorted",
"bai": "genome_bam_index",
"fasta": "fasta"
],
toState: [ "genome_bam_idxstats": "output" ],
key: "genome_idxstats",
directives: [ label: [ "highmem", "midcpu" ] ]
)
//
// Remove duplicate reads from BAM file based on UMIs
//
// Deduplicate genome BAM file
| umi_tools_dedup.run (
//
// Remove duplicate reads from BAM file based on UMIs
//
// Deduplicate genome BAM file
| umi_tools_dedup.run(
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
fromState: { id, state ->
def output_stats = state.umi_dedup_stats ? state.id :
@@ -1415,18 +1421,20 @@ workflow run_wf {
output_stats: output_stats]
},
toState: [ "genome_bam_sorted": "output" ],
key: "genome_deduped"
)
| samtools_index.run (
key: "genome_deduped",
directives: [ label: [ "highmem", "midcpu" ] ]
)
| samtools_index.run(
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
fromState: [
"input": "genome_bam_sorted",
"csi": "bam_csi_index"
],
toState: [ "genome_bam_index": "output" ],
key: "genome_deduped"
)
| samtools_stats.run (
key: "genome_deduped",
directives: [ label: [ "highmem", "midcpu" ] ]
)
| samtools_stats.run(
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
fromState: [
"input": "genome_bam_sorted",
@@ -1434,9 +1442,10 @@ workflow run_wf {
"fasta": "fasta"
],
toState: [ "genome_bam_stats": "output" ],
key: "genome_deduped_stats"
)
| samtools_flagstat.run (
key: "genome_deduped_stats",
directives: [ label: [ "highmem", "midcpu" ] ]
)
| samtools_flagstat.run(
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
fromState: [
"bam": "genome_bam_sorted",
@@ -1444,9 +1453,10 @@ workflow run_wf {
"fasta": "fasta"
],
toState: [ "genome_bam_flagstat": "output" ],
key: "genome_deduped_flagstat"
)
| samtools_idxstats.run(
key: "genome_deduped_flagstat",
directives: [ label: [ "highmem", "midcpu" ] ]
)
| samtools_idxstats.run(
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
fromState: [
"bam": "genome_bam_sorted",
@@ -1454,55 +1464,61 @@ workflow run_wf {
"fasta": "fasta",
],
toState: [ "genome_bam_idxstats": "output" ],
key: "genome_deduped_idxstats"
)
key: "genome_deduped_idxstats",
directives: [ label: [ "highmem", "midcpu" ] ]
)
// Deduplicate transcriptome BAM file
| samtools_sort.run (
| samtools_sort.run(
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
fromState: [ "input": "transcriptome_bam" ],
toState: [ "transcriptome_bam": "output" ],
key: "transcriptome_sorted"
)
| samtools_index.run (
key: "transcriptome_sorted",
directives: [ label: [ "highmem", "midcpu" ] ]
)
| samtools_index.run(
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
fromState: [
"input": "transcriptome_bam",
"csi": "bam_csi_index"
],
toState: [ "transcriptome_bam_index": "output" ],
key: "transcriptome_sorted"
)
| samtools_stats.run (
key: "transcriptome_sorted",
directives: [ label: [ "highmem", "midcpu" ] ]
)
| samtools_stats.run(
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
fromState: [
"input": "transcriptome_bam",
"bai": "transcriptome_bam_index",
],
toState: [ "transcriptome_bam_stats": "output" ],
key: "transcriptome_stats"
)
| samtools_flagstat.run (
key: "transcriptome_stats",
directives: [ label: [ "highmem", "midcpu" ] ]
)
| samtools_flagstat.run(
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
fromState: [
"bam": "transcriptome_bam",
"bai": "transcriptome_bam_index"
],
toState: [ "transcriptome_bam_flagstat": "output" ],
key: "transcriptome_flagstat"
)
| samtools_idxstats.run(
key: "transcriptome_flagstat",
directives: [ label: [ "highmem", "midcpu" ] ]
)
| samtools_idxstats.run(
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
fromState: [
"bam": "transcriptome_bam",
"bai": "transcriptome_bam_index"
],
toState: [ "transcriptome_bam_idxstats": "output" ],
key: "transcriptome_idxstats"
)
| umi_tools_dedup.run (
key: "transcriptome_idxstats",
directives: [ label: [ "highmem", "midcpu" ] ]
)
| umi_tools_dedup.run(
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
fromState: { id, state ->
def output_stats = state.umi_dedup_stats ? state.id :
@@ -1512,57 +1528,64 @@ workflow run_wf {
output_stats: output_stats]
},
toState: [ "transcriptome_bam_deduped": "output" ],
key: "transcriptome_deduped"
)
| samtools_sort.run (
key: "transcriptome_deduped",
directives: [ label: [ "highmem", "midcpu" ] ]
)
| samtools_sort.run(
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
fromState: [ "input": "transcriptome_bam_deduped" ],
toState: [ "transcriptome_bam": "output" ],
key: "transcriptome_deduped_sorted"
)
| samtools_index.run (
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
fromState: [
"input": "transcriptome_bam",
"csi": "bam_csi_index"
],
toState: [ "transcriptome_bam_index": "output" ],
key: "transcriptome_deduped_sorted"
)
| samtools_stats.run (
key: "transcriptome_deduped_sorted",
directives: [ label: [ "highmem", "midcpu" ] ]
)
| samtools_index.run(
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
fromState: [
"input": "transcriptome_bam",
"csi": "bam_csi_index"
],
toState: [ "transcriptome_bam_index": "output" ],
key: "transcriptome_deduped_sorted",
directives: [ label: [ "highmem", "midcpu" ] ]
)
| samtools_stats.run(
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
fromState: [
"input": "transcriptome_bam",
"bai": "transcriptome_bam_index"
],
toState: [ "transcriptome_bam_stats": "output" ],
key: "transcriptome_deduped_stats"
)
| samtools_flagstat.run (
key: "transcriptome_deduped_stats",
directives: [ label: [ "highmem", "midcpu" ] ]
)
| samtools_flagstat.run(
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
fromState: [
"bam": "transcriptome_bam",
"bai": "transcriptome_bam_index"
],
toState: [ "transcriptome_bam_flagstat": "output" ],
key: "transcriptome_deduped_flagstat"
)
| samtools_idxstats.run(
key: "transcriptome_deduped_flagstat",
directives: [ label: [ "highmem", "midcpu" ] ]
)
| samtools_idxstats.run(
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
fromState: [
"bam": "transcriptome_bam",
"bai": "transcriptome_bam_index"
],
toState: [ "transcriptome_bam_idxstats": "output" ],
key: "transcriptome_deduped_idxstats"
)
key: "transcriptome_deduped_idxstats",
directives: [ label: [ "highmem", "midcpu" ] ]
)
// Fix paired-end reads in name sorted BAM file
| umi_tools_prepareforrsem.run (
// Fix paired-end reads in name sorted BAM file
| umi_tools_prepareforrsem.run(
runIf: { id, state -> state.with_umi && state.paired && state.aligner == 'star_salmon' },
fromState: [ "input": "transcriptome_bam" ],
toState: [ "transcriptome_bam": "output" ]
)
toState: [ "transcriptome_bam": "output" ],
directives: [ label: [ "highmem", "midcpu" ] ]
)
// Infer lib-type for salmon quant
| map { id, state ->
@@ -1585,26 +1608,27 @@ workflow run_wf {
}
// Count reads from BAM alignments using Salmon
| salmon_quant.run (
runIf: { id, state -> state.aligner == 'star_salmon' },
fromState: [
"lib_type": "lib_type",
"alignments": "transcriptome_bam",
"targets": "transcript_fasta",
"gene_map": "gtf"
],
toState: [
"quant_out_dir": "output",
"quant_results_file": "quant_results"
]
| salmon_quant.run(
runIf: { id, state -> state.aligner == 'star_salmon' },
fromState: [
"lib_type": "lib_type",
"alignments": "transcriptome_bam",
"targets": "transcript_fasta",
"gene_map": "gtf"
],
toState: [
"quant_out_dir": "output",
"quant_results_file": "quant_results"
],
directives: [ label: [ "highmem", "midcpu" ] ]
)
| map { id, state ->
def mod_state = (state.aligner == 'star_salmon') ? state + [salmon_multiqc: state.quant_out_dir] : state
[ id, mod_state ]
}
| rsem_calculate_expression.run (
| map { id, state ->
def mod_state = (state.aligner == 'star_salmon') ? state + [salmon_multiqc: state.quant_out_dir] : state
[ id, mod_state ]
}
| rsem_calculate_expression.run(
runIf: { id, state -> state.aligner == 'star_rsem' },
fromState: [
"id": "id",
@@ -1635,26 +1659,29 @@ workflow run_wf {
"bam_star_rsem": "bam_star",
"bam_genome_rsem": "bam_genome",
"bam_transcript_rsem": "bam_transcript"
]
)
// RSEM_Star BAM
| samtools_sort.run (
],
directives: [ label: [ "highmem", "midcpu" ] ]
)
// RSEM_Star BAM
| samtools_sort.run(
runIf: { id, state -> state.aligner == 'star_rsem' },
fromState: ["input": "bam_star_rsem"],
toState: ["genome_bam_sorted": "output"],
key: "genome_sorted"
)
| samtools_index.run (
key: "genome_sorted",
directives: [ label: [ "highmem", "midcpu" ] ]
)
| samtools_index.run(
runIf: { id, state -> state.aligner == 'star_rsem' },
fromState: [
"input": "genome_bam_sorted",
"csi": "bam_csi_index"
],
toState: [ "genome_bam_index": "output" ],
key: "genome_sorted"
)
| samtools_stats.run (
key: "genome_sorted",
directives: [ label: [ "highmem", "midcpu" ] ]
)
| samtools_stats.run(
runIf: { id, state -> state.aligner == 'star_rsem' },
fromState: [
"input": "genome_bam_sorted",
@@ -1662,9 +1689,10 @@ workflow run_wf {
"fasta": "fasta"
],
toState: [ "genome_bam_stats": "output" ],
key: "genome_stats"
)
| samtools_flagstat.run (
key: "genome_stats",
directives: [ label: [ "highmem", "midcpu" ] ]
)
| samtools_flagstat.run(
runIf: { id, state -> state.aligner == 'star_rsem' },
fromState: [
"bam": "genome_bam_sorted",
@@ -1672,9 +1700,10 @@ workflow run_wf {
"fasta": "fasta"
],
toState: [ "genome_bam_flagstat": "output" ],
key: "genome_flagstat"
)
| samtools_idxstats.run(
key: "genome_flagstat",
directives: [ label: [ "highmem", "midcpu" ] ]
)
| samtools_idxstats.run(
runIf: { id, state -> state.aligner == 'star_rsem' },
fromState: [
"bam": "genome_bam_sorted",
@@ -1682,9 +1711,9 @@ workflow run_wf {
"fasta": "fasta"
],
toState: [ "genome_bam_idxstats": "output" ],
key: "genome_idxstats"
)
key: "genome_idxstats",
directives: [ label: [ "highmem", "midcpu" ] ]
)
| map { id, state ->
def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() }
[ id, mod_state ]
@@ -1709,9 +1738,10 @@ workflow run_wf {
"rsem_counts_gene": "rsem_counts_gene",
"rsem_counts_transcripts": "rsem_counts_transcripts",
"bam_genome_rsem": "bam_genome_rsem",
"bam_transcript_rsem": "bam_transcript_rsem" ]
"bam_transcript_rsem": "bam_transcript_rsem"
]
)
emit:
output_ch
}

View File

@@ -0,0 +1,44 @@
process {
// Default resources for components that hardly do any processing
memory = { 2.GB * task.attempt }
cpus = 1
// Retry for exit codes that have something to do with memory issues
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
maxRetries = 3
maxMemory = null
// Resource labels
withLabel: singlecpu { cpus = 1 }
withLabel: lowcpu { cpus = 2 }
withLabel: midcpu { cpus = 4 }
withLabel: highcpu { cpus = 8 }
withLabel: veryhighcpu { cpus = 16 }
withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } }
withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } }
}
def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}
try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}