diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index d4f66e4..0ac2fb1 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -1,8 +1,8 @@
report_comment: >
- This report has been generated by the
+ This report has been generated by the
analysis pipeline.
report_section_order:
- "rnaseq.vsh-methods-description":
+ "rnaseq-methods-description":
order: -1000
software_versions:
order: -1001
@@ -10,6 +10,7 @@ report_section_order:
order: -1002
export_plots: true
+disable_version_detection: true
# Run only these modules
run_modules:
@@ -19,7 +20,6 @@ run_modules:
- fastp
- sortmerna
- star
- # - hisat2
- rsem
- salmon
- kallisto
@@ -64,17 +64,15 @@ table_columns_visible:
fastqc:
percent_duplicates: False
-extra_fn_clean_exts:
- - ".salmon_quant"
- - ".mapping_quality"
- - ".genome_sorted"
- - ".MarkDuplicates"
- - ".MarkDuplicates_flagstat"
- - ".MarkDuplicates_stats"
- - ".genome_sorted_MarkDuplicates"
- - ".star_aligned"
+extra_fn_clean_extn:
+ # - ".mapping_quality"
+ # - ".MarkDuplicates_flagstat.output.flagstat"
+ # - ".MarkDuplicates_idxstats.output.idxstats"
+ # - ".MarkDuplicates_stats.output.txt"
+ # - ".genome_sorted_MarkDuplicates.output.bam"
+ # - ".genome_sorted_MarkDuplicates"
- ".read_1"
- - ".read_2"
+ - ".read_2"
# See https://github.com/ewels/MultiQC_TestData/blob/master/data/custom_content/with_config/table_headerconfig/multiqc_config.yaml
custom_data:
@@ -117,32 +115,31 @@ sp:
fn: "*.fastqc.zip"
cutadapt:
- fn: "*.trimming_report.txt"
+ fn: "*.trimming_report*.txt"
fastp:
- fn: "*.fastp.json"
+ fn: "*.fastp_out.json"
sortmerna:
fn: "*sortmerna*.log"
star:
- fn: "*.star_aligned.log.final.out"
+ fn: "*.star_align_reads.log.txt"
# hisat2:
# fn: "*.hisat2.summary.log"
-
- salmon/meta:
- fn: "*meta_info.json"
+ # salmon:
+ # fn: "*meta_info.json"
preseq:
fn: "*.lc_extrap.txt"
samtools/stats:
- fn: "*.stats"
+ fn: "*_stats.output.txt"
samtools/flagstat:
- fn: "*.flagstat"
+ fn: "*_flagstat.output.flagstat"
samtools/idxstats:
- fn: "*.idxstats*"
+ fn: "*_idxstats.output.idxstats"
rseqc/bam_stat:
fn: "*.mapping_quality.txt"
diff --git a/src/cat_fastq/config.vsh.yaml b/src/cat_fastq/config.vsh.yaml
index 7adc2ef..4f8b77b 100644
--- a/src/cat_fastq/config.vsh.yaml
+++ b/src/cat_fastq/config.vsh.yaml
@@ -25,13 +25,13 @@ argument_groups:
- name: "--fastq_1"
type: file
direction: output
- default: $id.read_1.merged.fastq
+ default: $id_r1.fastq
description: Concatenated read 1 fastq
- name: "--fastq_2"
type: file
direction: output
must_exist: false
- default: $id.read_2.merged.fastq
+ default: $id_r2.fastq
description: Concatenated read 2 fastq
resources:
diff --git a/src/fastqc/script.sh b/src/fastqc/script.sh
index 3b9461b..808d300 100644
--- a/src/fastqc/script.sh
+++ b/src/fastqc/script.sh
@@ -30,10 +30,12 @@ fastqc -o $tmpdir ${input[*]}
file1=$(basename -- "${input[0]}")
read1="${file1%.fastq*}"
-file2=$(basename -- "${input[1]}")
-read2="${file2%.fastq*}"
-
[[ -e "${tmpdir}/${read1}_fastqc.html" ]] && cp "${tmpdir}/${read1}_fastqc.html" $par_fastqc_html_1
-[[ -e "${tmpdir}/${read2}_fastqc.html" ]] && cp "${tmpdir}/${read2}_fastqc.html" $par_fastqc_html_2
[[ -e "${tmpdir}/${read1}_fastqc.zip" ]] && cp "${tmpdir}/${read1}_fastqc.zip" $par_fastqc_zip_1
-[[ -e "${tmpdir}/${read2}_fastqc.zip" ]] && cp "${tmpdir}/${read2}_fastqc.zip" $par_fastqc_zip_2
+
+if $par_paired; then
+ file2=$(basename -- "${input[1]}")
+ read2="${file2%.fastq*}"
+ [[ -e "${tmpdir}/${read2}_fastqc.html" ]] && cp "${tmpdir}/${read2}_fastqc.html" $par_fastqc_html_2
+ [[ -e "${tmpdir}/${read2}_fastqc.zip" ]] && cp "${tmpdir}/${read2}_fastqc.zip" $par_fastqc_zip_2
+fi
\ No newline at end of file
diff --git a/src/kallisto/kallisto_quant/script.sh b/src/kallisto/kallisto_quant/script.sh
index 6f19015..cba09f4 100644
--- a/src/kallisto/kallisto_quant/script.sh
+++ b/src/kallisto/kallisto_quant/script.sh
@@ -23,16 +23,7 @@ if [[ "$par_extra_args" != *"--fr-stranded"* ]] && [[ "$par_extra_args" != *"--r
fi
mkdir -p $par_output
-echo "kallisto quant \
- ${meta_cpus:+--threads $meta_cpus} \
- --index $par_index \
- ${par_gtf:+--gtf $par_gtf} \
- ${par_chromosomes:+--chromosomes $par_chromosomes} \
- $single_end_params \
- $strandedness \
- $par_extra_args \
- -o $par_output \
- ${input[*]} 2> >(tee -a ${par_output}/kallisto_quant.log >&2)"
+
kallisto quant \
${meta_cpus:+--threads $meta_cpus} \
--index $par_index \
diff --git a/src/prepare_multiqc_input/script.sh b/src/prepare_multiqc_input/script.sh
index fef1f73..daa63e2 100644
--- a/src/prepare_multiqc_input/script.sh
+++ b/src/prepare_multiqc_input/script.sh
@@ -24,6 +24,8 @@ IFS="," read -ra rsem_multiqc <<< $par_rsem_multiqc && for file in "${rsem_multi
IFS="," read -ra salmon_multiqc <<< $par_salmon_multiqc && for file in "${salmon_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done
+IFS="," read -ra pseudo_multiqc <<< $par_pseudo_multiqc && for file in "${pseudo_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done
+
IFS="," read -ra samtools_stats <<< $par_samtools_stats && for file in "${samtools_stats[@]}"; do [ -e "$file" ] && cp -r "$file" $par_output/; done
IFS="," read -ra samtools_flagstat <<< $par_samtools_flagstat && for file in "${samtools_flagstat[@]}"; do [ -e "$file" ] && cp -r "$file" $par_output/; done
@@ -32,12 +34,9 @@ IFS="," read -ra samtools_idxstats <<< $par_samtools_idxstats && for file in "${
IFS="," read -ra markduplicates_multiqc <<< $par_markduplicates_multiqc && for file in "${markduplicates_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done
-IFS="," read -ra pseudo_multiqc <<< $par_pseudo_multiqc && for file in "${pseudo_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done
-
-
IFS="," read -ra featurecounts_multiqc <<< $par_featurecounts_multiqc && for file in "${featurecounts_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done
-IFS="," read -ra featurecounts_rrna_multiqc <<< $par_featurecounts_rrna_multiqc&& for file in "${featurecounts_rrna_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done
+IFS="," read -ra featurecounts_rrna_multiqc <<< $par_featurecounts_rrna_multiqc && for file in "${featurecounts_rrna_multiqc[@]}"; do [ -e "$file" ] && cp -r "$file" "$par_output/"; done
[ -e "$par_aligner_pca_multiqc" ] && cp -r "$par_aligner_pca_multiqc" "$par_output/"
diff --git a/src/rsem/rsem_calculate_expression/config.vsh.yaml b/src/rsem/rsem_calculate_expression/config.vsh.yaml
index b97997d..439404e 100644
--- a/src/rsem/rsem_calculate_expression/config.vsh.yaml
+++ b/src/rsem/rsem_calculate_expression/config.vsh.yaml
@@ -33,9 +33,6 @@ argument_groups:
- name: "--extra_args"
type: string
description: Extra rsem-calculate-expression arguments in addition to the defaults.
- - name: "--versions"
- type: file
- must_exist: false
- name: "Output"
arguments:
diff --git a/src/rsem/rsem_calculate_expression/script.sh b/src/rsem/rsem_calculate_expression/script.sh
index 27049a6..7b9e89b 100755
--- a/src/rsem/rsem_calculate_expression/script.sh
+++ b/src/rsem/rsem_calculate_expression/script.sh
@@ -19,10 +19,10 @@ fi
IFS="," read -ra input <<< $par_input
-INDEX=`find -L $meta_resources_dir/ -name "*.grp" | sed 's/\.grp$//'`
+INDEX=`find -L $par_index/ -name "*.grp" | sed 's/\.grp$//'`
rsem-calculate-expression \
- ${meta_cpus:+--num-theads $meta_cpus} \
+ ${meta_cpus:+--num-threads $meta_cpus} \
$strandedness \
${par_paired:+--paired-end} \
$par_extra_args \
@@ -30,3 +30,10 @@ rsem-calculate-expression \
$INDEX \
$par_id
+[[ -e "${par_id}.genes.results" ]] && mv "${par_id}.genes.results" $par_counts_gene
+[[ -e "${par_id}id.isoforms.results" ]] && mv "${par_id}id.isoforms.results" $par_counts_transcripts
+[[ -e "${par_id}.stat" ]] && mv -r "${par_id}.stat" $par_stat
+# [[ -e "${par_id}.log" ]] && mv "${par_id}.log" $par_logs
+[[ -e "${par_id}.STAR.genome.bam" ]] && mv "${par_id}.STAR.genome.bam" $par_bam_star
+[[ -e "${par_id}.genome.bam" ]] && mv "${par_id}.genome.bam" $par_bam_genome
+[[ -e "${par_id}.transcript.bam" ]] && mv "${par_id}.transcript.bam" $par_bam_transcript
\ No newline at end of file
diff --git a/src/rsem/rsem_merge_counts/config.vsh.yaml b/src/rsem/rsem_merge_counts/config.vsh.yaml
index 58b713a..a814662 100644
--- a/src/rsem/rsem_merge_counts/config.vsh.yaml
+++ b/src/rsem/rsem_merge_counts/config.vsh.yaml
@@ -18,9 +18,6 @@ argument_groups:
- name: "--counts_transcripts"
type: file
description: Expression counts on transcript level (isoforms)
- - name: "--versions"
- type: file
- must_exist: false
- name: "Output"
arguments:
@@ -44,10 +41,6 @@ argument_groups:
description: File containing transcript TPM across all samples.
default: rsem.merged.transcript_tpm.tsv
direction: output
- - name: "--updated_versions"
- type: file
- default: versions.yml
- direction: output
resources:
- type: bash_script
diff --git a/src/trimgalore/config.vsh.yaml b/src/trimgalore/config.vsh.yaml
index 0c4068d..6e405f0 100644
--- a/src/trimgalore/config.vsh.yaml
+++ b/src/trimgalore/config.vsh.yaml
@@ -232,13 +232,13 @@ argument_groups:
required: false
description: Output file for read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.
direction: output
- example: read_1.fastq
+ example: read_1.fastq.gz
- name: --trimmed_r2
type: file
required: false
description: Output file for read 2. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.
direction: output
- example: read_2.fastq
+ example: read_2.fastq.gz
- name: --trimming_report_r1
type: file
required: false
diff --git a/src/workflows/genome_alignment_and_quant/config.vsh.yaml b/src/workflows/genome_alignment_and_quant/config.vsh.yaml
index d93b916..7816d39 100644
--- a/src/workflows/genome_alignment_and_quant/config.vsh.yaml
+++ b/src/workflows/genome_alignment_and_quant/config.vsh.yaml
@@ -76,7 +76,7 @@ argument_groups:
type: string
default: 'gene_name'
description: By default, the pipeline uses the gene_name field to obtain additional gene identifiers from the input GTF file when running Salmon.
- - name: extra_rsem_calculate_expression_args
+ - name: --extra_rsem_calculate_expression_args
type: string
description: Extra arguments to pass to rsem-calculate-expression command in addition to defaults defined by the pipeline.
- name: "--aligner"
@@ -96,7 +96,7 @@ argument_groups:
- name: "--star_multiqc"
type: file
direction: output
- default: $id.star_align.log
+ default: $id_star.log
- name: "--genome_bam_sorted"
type: file
direction: output
@@ -145,7 +145,40 @@ argument_groups:
type: file
direction: output
default: $id.quant.sf
-
+ - name: "--salmon_multiqc"
+ type: file
+ direction: output
+ - name: "--rsem_counts_gene"
+ type: file
+ description: Expression counts on gene level
+ default: $id.genes.results
+ direction: output
+ - name: "--counts_transcripts"
+ type: file
+ description: Expression counts on transcript level
+ default: $id.isoforms.results
+ direction: output
+ - name: "--rsem_multiqc"
+ type: file
+ description: RSEM statistics
+ default: $id.stat
+ direction: output
+ - name: "--bam_star_rsem"
+ type: file
+ description: BAM file generated by STAR (optional)
+ default: $id.STAR.genome.bam
+ direction: output
+ - name: "--bam_genome_rsem"
+ type: file
+ description: Genome BAM file (optional)
+ default: $id.genome.bam
+ direction: output
+ - name: "--bam_transcript_rsem"
+ type: file
+ description: Transcript BAM file (optional)
+ default: $id.transcript.bam
+ direction: output
+
resources:
- type: nextflow_script
path: main.nf
@@ -165,9 +198,11 @@ dependencies:
- name: samtools/samtools_idxstats
repository: biobox
- name: umitools/umitools_dedup
+ # - name: umi_tools/umi_tools_dedup
# repository: biobox
- name: umitools_prepareforquant
- # repository: biobox
+ # - name: umi_tools/umi_tools_prepareforquant
+ # repository: biobox
- name: salmon/salmon_quant
repository: biobox
- name: rsem/rsem_calculate_expression
diff --git a/src/workflows/genome_alignment_and_quant/main.nf b/src/workflows/genome_alignment_and_quant/main.nf
index db456c4..4734670 100644
--- a/src/workflows/genome_alignment_and_quant/main.nf
+++ b/src/workflows/genome_alignment_and_quant/main.nf
@@ -280,6 +280,11 @@ workflow run_wf {
]
)
+ | map { id, state ->
+ def mod_state = (state.aligner == 'star_salmon') ? state + [salmon_multiqc: state.quant_out_dir] : state
+ [ id, mod_state ]
+ }
+
| rsem_calculate_expression.run (
runIf: { id, state -> state.aligner == 'star_rsem' },
fromState: [
@@ -300,7 +305,7 @@ workflow run_wf {
"bam_transcript_rsem": "bam_transcript"
]
)
-
+
// RSEM_Star BAM
| samtools_sort.run (
runIf: { id, state -> state.aligner == 'star_rsem' },
@@ -357,6 +362,7 @@ workflow run_wf {
[ "star_alignment": "star_alignment",
"star_multiqc": "star_multiqc",
"rsem_multiqc": "rsem_multiqc",
+ "salmon_multiqc": "salmon_multiqc",
"genome_bam_sorted": "genome_bam_sorted",
"genome_bam_index": "genome_bam_index",
"genome_bam_stats": "genome_bam_stats",
@@ -368,7 +374,11 @@ workflow run_wf {
"transcriptome_bam_flagstat": "transcriptome_bam_flagstat",
"transcriptome_bam_idxstats": "transcriptome_bam_idxstats",
"quant_out_dir": "quant_out_dir",
- "quant_results_file": "quant_results_file" ]
+ "quant_results_file": "quant_results_file",
+ "rsem_counts_gene": "rsem_counts_gene",
+ "rsem_counts_transcripts": "rsem_counts_transcripts",
+ "bam_genome_rsem": "bam_genome_rsem",
+ "bam_transcript_rsem": "bam_transcript_rsem" ]
)
emit:
diff --git a/src/workflows/genome_alignment_and_quant/test_run.sh b/src/workflows/genome_alignment_and_quant/test_run.sh
index d0c8a2b..c48d707 100755
--- a/src/workflows/genome_alignment_and_quant/test_run.sh
+++ b/src/workflows/genome_alignment_and_quant/test_run.sh
@@ -1,25 +1,43 @@
#!/bin/bash
-viash ns build --setup cb --parallel
+# v;iash ns build --setup cb --parallel
# Split error message from standard output
# viash ns list > /dev/null
-CURR=`pwd`
+echo "> Preparing reference data files"
+gunzip --keep testData/minimal_test/reference/genes.gtf.gz
cat > testData/minimal_test/input_fastq/sample_sheet.csv << HERE
id,fastq_1,fastq_2,strandedness
WT_REP1,SRR6357070_1.fastq.gz,SRR6357070_2.fastq.gz,reverse
-WT_REP2,SRR6357072_1.fastq.gz,SRR6357072_2.fastq.gz,reverse
+RAP1_UNINDUCED_REP1,SRR6357073_1.fastq.gz,,reverse
HERE
+# echo "> Test 1: STAR Salmon"
+# nextflow run target/nextflow/workflows/genome_alignment_and_quant/main.nf \
+# --param_list testData/minimal_test/input_fastq/sample_sheet.csv \
+# --publish_dir test_results/genome_alignment_test1 \
+# --fasta testData/minimal_test/reference/genome.fasta \
+# --gtf testData/minimal_test/reference/genes.gtf \
+# --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
+# --star_index test_results/output_test1/STAR_index \
+# --aligner star_salmon \
+# -profile docker \
+# -resume
+
+echo "> Test 2: STAR RSEM"
nextflow run target/nextflow/workflows/genome_alignment_and_quant/main.nf \
--param_list testData/minimal_test/input_fastq/sample_sheet.csv \
- --publish_dir "test_results/genome_alignment_test" \
+ --publish_dir test_results/genome_alignment_test2 \
--fasta testData/minimal_test/reference/genome.fasta \
- --gtf testData/minimal_test/reference/genes.gtf.gz \
+ --gtf testData/minimal_test/reference/genes.gtf \
--transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
- --star_index testData/test_output/star_index \
- --aligner "star_rsem" \
+ --rsem_index test_results/output_test1/RSEM_index \
+ --aligner star_rsem \
+ --extra_rsem_calculate_expression_args "--star --star-output-genome-bam --star-gzipped-read-file --estimate-rspd --seed 1" \
-profile docker \
- # -resume
\ No newline at end of file
+ -resume
+
+echo "Removing reference data files"
+rm testData/minimal_test/reference/genes.gtf
diff --git a/src/workflows/post_processing/config.vsh.yaml b/src/workflows/post_processing/config.vsh.yaml
index 50b52e9..40ca733 100644
--- a/src/workflows/post_processing/config.vsh.yaml
+++ b/src/workflows/post_processing/config.vsh.yaml
@@ -79,27 +79,27 @@ argument_groups:
- name: "--processed_genome_bam"
type: file
direction: output
- default: $id.markdup.sorted.bam
+ default: $id.genome.bam
- name: "--genome_bam_index"
type: file
direction: output
- default: $id.markdup.sorted.bam
+ default: $id.genome.bam.bai
- name: "--genome_bam_stats"
type: file
direction: output
- default: $id.markdup.sorted.bam.stats
+ default: $id.genome.stats
- name: "--genome_bam_flagstat"
type: file
direction: output
- default: $id.markdup.sorted.bam.flagstat
+ default: $id.genome.flagstat
- name: "--genome_bam_idxstats"
type: file
direction: output
- default: $id.markdup.sorted.bam.idxstats
+ default: $id.genome.idxstats
- name: "--markduplicates_metrics"
type: file
direction: output
- default: $id.markdup.sorted.MarkDuplicates.metrics.txt
+ default: $id.MarkDuplicates.metrics.txt
- name: "--stringtie_transcript_gtf"
type: file
direction: output
@@ -151,6 +151,8 @@ dependencies:
- name: samtools/samtools_idxstats
repository: biobox
- name: stringtie
+ # - name: bedtools/bedtools_genomecov
+ # repository: biobox
- name: bedtools_genomecov
- name: ucsc/bedclip
- name: ucsc/bedgraphtobigwig
diff --git a/src/workflows/post_processing/main.nf b/src/workflows/post_processing/main.nf
index 80b15e0..541ef71 100644
--- a/src/workflows/post_processing/main.nf
+++ b/src/workflows/post_processing/main.nf
@@ -16,20 +16,20 @@ workflow run_wf {
"extra_picard_args": "extra_picard_args"
],
toState: [
- "genome_bam": "output_bam",
+ "processed_genome_bam": "output_bam",
"markduplicates_metrics": "metrics"
]
)
| samtools_sort.run (
runIf: { id, state -> !state.skip_markduplicates && !state.with_umi },
- fromState: [ "input": "genome_bam" ],
- toState: [ "genome_bam": "output" ],
+ fromState: [ "input": "processed_genome_bam" ],
+ toState: [ "processed_genome_bam": "output" ],
key: "genome_sorted_MarkDuplicates"
)
| samtools_index.run (
runIf: { id, state -> !state.skip_markduplicates && !state.with_umi },
fromState: [
- "input": "genome_bam",
+ "input": "processed_genome_bam",
"csi": "bam_csi_index"
],
toState: [ "genome_bam_index": "output" ],
@@ -38,7 +38,7 @@ workflow run_wf {
| samtools_stats.run (
runIf: { id, state -> !state.skip_markduplicates && !state.with_umi },
fromState: [
- "input": "genome_bam",
+ "input": "processed_genome_bam",
"bai": "genome_bam_index"
],
toState: [ "genome_bam_stats": "output" ],
@@ -47,7 +47,7 @@ workflow run_wf {
| samtools_flagstat.run (
runIf: { id, state -> !state.skip_markduplicates && !state.with_umi },
fromState: [
- "bam": "genome_bam",
+ "bam": "processed_genome_bam",
"bai": "genome_bam_index"
],
toState: [ "genome_bam_flagstat": "output" ],
@@ -56,7 +56,7 @@ workflow run_wf {
| samtools_idxstats.run(
runIf: { id, state -> !state.skip_markduplicates && !state.with_umi },
fromState: [
- "bam": "genome_bam",
+ "bam": "processed_genome_bam",
"bai": "genome_bam_index"
],
toState: [ "genome_bam_idxstats": "output" ],
@@ -67,7 +67,7 @@ workflow run_wf {
runIf: { id, state -> !state.skip_stringtie },
fromState: [
"strandedness": "strandedness",
- "bam": "genome_bam",
+ "bam": "processed_genome_bam",
"annotation_gtf": "gtf",
"extra_stringtie_args": "extra_stringtie_args"
],
@@ -85,7 +85,7 @@ workflow run_wf {
runIf: { id, state -> !state.skip_bigwig },
fromState: [
"strandedness": "strandedness",
- "bam": "genome_bam",
+ "bam": "processed_genome_bam",
"extra_bedtools_args": "extra_bedtools_args"
],
toState: [
@@ -140,7 +140,7 @@ workflow run_wf {
}
| setState (
- "processed_genome_bam": "genome_bam",
+ "processed_genome_bam": "processed_genome_bam",
"genome_bam_index": "genome_bam_index",
"genome_bam_stats": "genome_bam_stats",
"genome_bam_flagstat": "genome_bam_flagstat",
diff --git a/src/workflows/post_processing/test_run.sh b/src/workflows/post_processing/test_run.sh
index 8acd71d..d62aee2 100755
--- a/src/workflows/post_processing/test_run.sh
+++ b/src/workflows/post_processing/test_run.sh
@@ -1,6 +1,6 @@
#!/bin/bash
-viash ns build --setup cb --parallel
+# viash ns build --setup cb --parallel
nextflow run target/nextflow/workflows/post_processing/main.nf \
--publish_dir "testData/paired_end_test" \
diff --git a/src/workflows/pre_processing/config.vsh.yaml b/src/workflows/pre_processing/config.vsh.yaml
index b4868d1..b26fde6 100644
--- a/src/workflows/pre_processing/config.vsh.yaml
+++ b/src/workflows/pre_processing/config.vsh.yaml
@@ -139,13 +139,13 @@ argument_groups:
- name: "Read filtering options"
arguments:
- name: "--skip_bbsplit"
- type: boolean
+ type: boolean_true
description: Skip BBSplit for removal of non-reference genome reads.
- default: true
+ # default: true
- name: "--remove_ribo_rna"
- type: boolean
+ type: boolean_true
description: Enable the removal of reads derived from ribosomal RNA using SortMeRNA.
- default: false
+ # default: false
- name: "Other options"
arguments:
@@ -162,14 +162,14 @@ argument_groups:
required: false
must_exist: false
description: Path to output directory
- default: $id.$key.read_1.fastq
+ default: $id.read_1.fastq
- name: "--qc_output2"
type: file
direction: output
required: false
must_exist: false
description: Path to output directory
- default: $id.$key.read_2.fastq
+ default: $id.read_2.fastq
- name: "--fastqc_html_1"
type: file
direction: output
@@ -246,7 +246,20 @@ argument_groups:
direction: output
description: Results from Salmon quant
default: $id.salmon_quant_output
-
+ - name: --trim_json
+ type: file
+ description: The fastp json format report file name
+ default: $id.fastp_out.json
+ direction: output
+ - name: --trim_html
+ type: file
+ description: The fastp html format report file name
+ default: $id.fastp_out.html
+ direction: output
+ - name: --merged_out
+ type: file
+ description: File name to store merged fastp output.
+ direction: output
resources:
- type: nextflow_script
path: main.nf
@@ -256,14 +269,18 @@ dependencies:
- name: fastqc
# repository: biobox
- name: umitools/umitools_extract
- # repository: biobox
+ - name: umi_tools/umi_tools_extract
+ repository: biobox
- name: trimgalore
# repository: biobox
- name: bbmap_bbsplit
+ # repository: biobox
- name: sortmerna
+ # repository: biobox
- name: fastp
repository: biobox
- name: fq_subsample
+ # repository: biobox
- name: salmon/salmon_quant
repository: biobox
diff --git a/src/workflows/pre_processing/main.nf b/src/workflows/pre_processing/main.nf
index caa9a42..6e54b3f 100644
--- a/src/workflows/pre_processing/main.nf
+++ b/src/workflows/pre_processing/main.nf
@@ -74,7 +74,8 @@ workflow run_wf {
"trim_zip_2": "trimmed_fastqc_zip_2",
"trim_html_1": "trimmed_fastqc_html_1",
"trim_html_2": "trimmed_fastqc_html_2"
- ]
+ ],
+ args: [gzip: true, fastqc: true]
)
// Trim reads using fastp
@@ -90,7 +91,7 @@ workflow run_wf {
],
toState: [
"fastq_1": "out1",
- // "fastq_2": "out2",
+ "fastq_2": "out2",
"failed_trim": "failed_out",
"failed_trim_unpaired1": "unpaired1",
"failed_trim_unpaired2": "unpaired2",
@@ -154,8 +155,10 @@ workflow run_wf {
runIf: { id, state -> state.strandedness == 'auto' },
fromState: { id, state ->
def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ]
- [ input: input,
- extra_args: state.extra_fq_subsample_args ]
+ [
+ input: input,
+ extra_args: state.extra_fq_subsample_args
+ ]
},
toState: [
"subsampled_fastq_1": "output_1",
@@ -189,9 +192,8 @@ workflow run_wf {
def mates1 = state.paired ? state.subsampled_fastq_1 : null
def mates2 = state.paired ? state.subsampled_fastq_2 : null
[ unmated_reads: unmated_reads,
- mates1: state.fastq1,
- mates2: state.fastq2,
- targets: state.transcript_fasta,
+ mates1: mates1,
+ mates2: mates2,
gene_map: state.gtf,
index: state.salmon_index,
lib_type: state.lib_type ]
diff --git a/src/workflows/pre_processing/test_run.sh b/src/workflows/pre_processing/test_run.sh
index 7c94e9b..60fe689 100755
--- a/src/workflows/pre_processing/test_run.sh
+++ b/src/workflows/pre_processing/test_run.sh
@@ -2,30 +2,55 @@
# viash ns build --parallel --setup cb
-# nextflow run target/nextflow/workflows/pre_processing/main.nf \
-# --id RAP1_UNINDUCED_REP1 \
-# --input 'testData/minimal_test/input_fastq/SRR6357073_1.fastq.gz' \
-# --publish_dir "test_results/preprocessing_no_samplesheet" \
-# --umitools_bc_pattern "NNNN" \
-# -profile docker \
-# -resume
+echo "> Preparing reference data files"
+gunzip --keep testData/minimal_test/reference/genes.gtf.gz
+mkdir -p testData/minimal_test/reference/salmon_index
+tar -C testData/minimal_test/reference/salmon_index --strip-components 1 -xavf testData/minimal_test/reference/salmon.tar.gz --no-same-owner
# Test paired-end data
cat > testData/minimal_test/input_fastq/sample_sheet.csv << HERE
id,fastq_1,fastq_2,strandedness
-WT_REP2,SRR6357072_1.fastq.gz,SRR6357072_2.fastq.gz,reverse
-RAP1_IAA_30M_REP1,SRR6357076_1.fastq.gz,SRR6357076_2.fastq.gz,reverse
+WT_REP2,SRR6357072_1.fastq.gz,SRR6357072_2.fastq.gz,auto
+RAP1_UNINDUCED_REP1,SRR6357073_1.fastq.gz,,reverse
HERE
+echo "> Test 1: Running workflow with trimgalore"
nextflow run target/nextflow/workflows/pre_processing/main.nf \
--param_list testData/minimal_test/input_fastq/sample_sheet.csv \
- --publish_dir "testData/paired_end_test" \
+ --publish_dir "test_results/pre_processing_test1" \
--bbsplit_fasta_list testData/minimal_test/reference/bbsplit_fasta_list.txt \
--transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
- --gtf testData/minimal_test/reference/gene_annotation.gtf \
- --salmon_index testData/minimal_test/reference/salmon.tar.gz \
+ --gtf testData/minimal_test/reference/genes.gtf \
+ --salmon_index testData/minimal_test/reference/salmon_index \
--skip_trimming false \
--trimmer trimgalore \
--remove_ribo_rna false \
+ --ribo_database_manifest testData/minimal_test/reference/rrna-db-defaults.txt \
+ --skip_bbsplit false \
+ --bbsplit_index test_results/prepare_genome_test1/BBSplit_index \
-profile docker \
- -resume
\ No newline at end of file
+ -resume
+
+# echo "> Test 2: Running workflow with fastp"
+# nextflow run target/nextflow/workflows/pre_processing/main.nf \
+# --param_list testData/minimal_test/input_fastq/sample_sheet.csv \
+# --publish_dir "test_results/pre_processing_test2" \
+# --bbsplit_fasta_list testData/minimal_test/reference/bbsplit_fasta_list.txt \
+# --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
+# --gtf testData/minimal_test/reference/genes.gtf \
+# --salmon_index testData/minimal_test/reference/salmon_index \
+# --skip_trimming false \
+# --trimmer fastp \
+# --remove_ribo_rna false \
+# --ribo_database_manifest testData/minimal_test/reference/rrna-db-defaults.txt \
+# --skip_bbsplit false \
+# --bbsplit_index test_results/output_test1/BBSplit_index \
+# -profile docker \
+# -resume
+
+echo "Removing reference data files"
+rm testData/minimal_test/reference/genes.gtf
+rm -r testData/minimal_test/reference/salmon_index
+
+# TODO: Fix error while running sortmerna component
+# docker: Error response from daemon: failed to create task for container: failed to create shim task: OCI runtime create failed: runc create failed: unable to start container process: exec: "/bin/bash": stat /bin/bash: no such file or directory: unknown.
\ No newline at end of file
diff --git a/src/workflows/prepare_genome/config.vsh.yaml b/src/workflows/prepare_genome/config.vsh.yaml
index 0d1816e..f50b6b5 100644
--- a/src/workflows/prepare_genome/config.vsh.yaml
+++ b/src/workflows/prepare_genome/config.vsh.yaml
@@ -163,6 +163,7 @@ dependencies:
- name: salmon/salmon_index
repository: biobox
- name: kallisto/kallisto_index
+ # repository: biobox
runners:
- type: executable
diff --git a/src/workflows/prepare_genome/main.nf b/src/workflows/prepare_genome/main.nf
index bc2fa44..1f23a45 100644
--- a/src/workflows/prepare_genome/main.nf
+++ b/src/workflows/prepare_genome/main.nf
@@ -35,9 +35,15 @@ workflow run_wf {
// gff to gtf
| gffread.run (
runIf: {id, state -> !state.gtf && state.gff},
- fromState: [ "input": "annotation" ],
+ fromState: [
+ "input": "gff",
+ "genome": "fasta"
+ ],
toState: [ "gtf": "outfile" ],
- args: [output: "gene_annotation.gtf"]
+ args: [
+ outfile: "gene_annotation.gtf",
+ gtf_output: true
+ ]
)
| gtf_filter.run(
@@ -116,13 +122,19 @@ workflow run_wf {
| rsem_prepare_reference.run (
runIf: {id, state -> !state.transcript_fasta},
fromState: [
- "fasta": "fasta",
+ "reference_fasta_files": "fasta",
"gtf": "gtf"
],
- toState: [ "transcript_fasta": "transcript_fasta" ],
- key: "make_transcript_fasta",
- args: [transcript_fasta: "transcriptome.fasta"]
+ toState: [ "make_transcript_fasta_output": "output" ],
+ key: "make_transcript_fasta",\
+ args: [reference_name: "genome"]
)
+ | map { id, state ->
+ def transcript_fasta = (!state.transcript_fasta) ?
+ state.make_transcript_fasta_output.listFiles().find{it.name == "genome.transcripts.fa"} :
+ state.transcript_fasta
+ [ id, state + [transcript_fasta: transcript_fasta] ]
+ }
// chromosome size and fai index
| getchromsizes.run (
@@ -195,11 +207,12 @@ workflow run_wf {
| rsem_prepare_reference.run (
runIf: {id, state -> !state.rsem_index && state.aligner == 'star_rsem'},
fromState: [
- "fasta": "fasta",
+ "reference_fasta_files": "fasta",
"gtf": "gtf"
],
- toState: [ "rsem_index": "rsem" ],
+ toState: [ "rsem_index": "output" ],
key: "generate_rsem_index",
+ args: [reference_name: "genome"]
)
// TODO: Uncompress HISAT2 index or generate from scratch if required
@@ -229,10 +242,7 @@ workflow run_wf {
// Uncompress Kallisto index or generate from scratch if required
| untar.run (
runIf: {id, state -> state.kallisto_index},
- fromState: [
- "input": "kallisto_index",
- "pseudo_aligner_kmer_size": "pseudo_aligner_kmer_size"
- ],
+ fromState: [ "input": "kallisto_index" ],
toState: [ "kallisto_index": "output" ],
key: "untar_kallisto_index",
args: [output: "Kallisto_index"]
diff --git a/src/workflows/prepare_genome/test_run.sh b/src/workflows/prepare_genome/test_run.sh
index ab3f7b3..4c274cd 100755
--- a/src/workflows/prepare_genome/test_run.sh
+++ b/src/workflows/prepare_genome/test_run.sh
@@ -1,26 +1,50 @@
#!/bin/bash
-viash ns build --setup cb --parallel
+# viash ns build --setup cb --parallel -q prepare_genome
+# echo "Test 1: Annotation file format - GTF"
+# nextflow run target/nextflow/workflows/prepare_genome/main.nf \
+# --id test1 \
+# --publish_dir "test_results/prepare_genome_test1" \
+# --fasta testData/minimal_test/reference/genome.fasta \
+# --gtf testData/minimal_test/reference/genes.gtf.gz \
+# --additional_fasta testData/minimal_test/reference/gfp.fa.gz \
+# --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
+# --genotype false \
+# --biotype gene_biotype \
+# --bbsplit_fasta_list testData/minimal_test/reference/bbsplit_fasta_list.txt \
+# --salmon_index testData/minimal_test/reference/salmon.tar.gz \
+# --rsem_index testData/minimal_test/reference/rsem.tar.gz \
+# -profile docker \
+# -resume
+
+# echo "Test 2: Annotation file format - GFF"
+# nextflow run target/nextflow/workflows/prepare_genome/main.nf \
+# --id test2 \
+# --publish_dir "test_results/prepare_genome_test2" \
+# --fasta testData/minimal_test/reference/genome.fasta \
+# --gff testData/minimal_test/reference/genes.gff.gz \
+# --additional_fasta testData/minimal_test/reference/gfp.fa.gz \
+# --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
+# --genotype false \
+# --biotype gene_biotype \
+# --bbsplit_fasta_list testData/minimal_test/reference/bbsplit_fasta_list.txt \
+# --salmon_index testData/minimal_test/reference/salmon.tar.gz \
+# --rsem_index testData/minimal_test/reference/rsem.tar.gz \
+# -profile docker \
+# -resume
+
+echo "Test 3: Annotation file format - GTF; Generate indices; Generate transcripts fasta"
nextflow run target/nextflow/workflows/prepare_genome/main.nf \
- --id ref \
- --publish_dir "testData/test_output" \
- --fasta testData/reference/genome.fasta \
- --gtf testData/reference/genes.gtf.gz \
- --additional_fasta testData/reference/gfp.fa.gz \
- --transcript_fasta testData/reference/transcriptome.fasta \
+ --id test3 \
+ --publish_dir "test_results/prepare_genome_test3" \
+ --fasta testData/minimal_test/reference/genome.fasta \
+ --gtf testData/minimal_test/reference/genes.gtf.gz \
+ --additional_fasta testData/minimal_test/reference/gfp.fa.gz \
--genotype false \
--biotype gene_biotype \
- --bbsplit_fasta_list testData/reference/bbsplit_fasta_list.txt \
- --salmon_index testData/reference/salmon.tar.gz \
- # -profile docker \
- # -resume
- # --gff testData/reference/genes.gff.gz \
- # --prepare_tools_indices a,b,c \
- # --gene_bed "" \
- # --splicesites "" \
- # --star_index "" \
- # --bbsplit_index "" \
- # --rsem_index testData/reference/rsem.tar.gz \
- # --salmon_index testData/reference/salmon.tar.gz \
- # --hisat2_index testData/reference/hisat2.tar.gz \
+ --bbsplit_fasta_list testData/minimal_test/reference/bbsplit_fasta_list.txt \
+ --pseudo_aligner kallisto \
+ --aligner star_rsem \
+ -profile docker \
+ -resume
diff --git a/src/workflows/pseudo_alignment_and_quant/config.vsh.yaml b/src/workflows/pseudo_alignment_and_quant/config.vsh.yaml
index 7a67ba8..976bc06 100644
--- a/src/workflows/pseudo_alignment_and_quant/config.vsh.yaml
+++ b/src/workflows/pseudo_alignment_and_quant/config.vsh.yaml
@@ -59,11 +59,10 @@ argument_groups:
- name: "--pseudo_multiqc"
type: file
direction: output
- default: $id.quant.log
- name: "--quant_out_dir"
type: file
direction: output
- default: $id.salmon_quant
+ default: $id.quant
- name: "--salmon_quant_results_file"
type: file
direction: output
diff --git a/src/workflows/pseudo_alignment_and_quant/main.nf b/src/workflows/pseudo_alignment_and_quant/main.nf
index 18dd755..e275962 100644
--- a/src/workflows/pseudo_alignment_and_quant/main.nf
+++ b/src/workflows/pseudo_alignment_and_quant/main.nf
@@ -46,11 +46,16 @@ workflow run_wf {
lib_type: state.lib_type ]
},
toState: [
- "quant_results_dir": "output",
+ "quant_out_dir": "output",
"salmon_quant_results_file": "quant_results"
]
)
+ | map { id, state ->
+ def mod_state = (state.pseudo_aligner == 'salmon') ? state + [pseudo_multiqc: state.quant_out_dir] : state
+ [ id, mod_state ]
+ }
+
| kallisto_quant.run (
runIf: { id, state -> state.pseudo_aligner == 'kallisto'},
fromState: [
diff --git a/src/workflows/pseudo_alignment_and_quant/test_run.sh b/src/workflows/pseudo_alignment_and_quant/test_run.sh
index ff0c09a..cdc2bd0 100755
--- a/src/workflows/pseudo_alignment_and_quant/test_run.sh
+++ b/src/workflows/pseudo_alignment_and_quant/test_run.sh
@@ -1,26 +1,45 @@
#!/bin/bash
-viash ns build --setup cb -q pseudo_alignment_and_quant
+# viash ns build --setup cb -q pseudo_alignment_and_quant
# Split error message from standard output
# viash ns list > /dev/null
-CURR=`pwd`
+echo "> Preparing reference data files"
+gunzip --keep testData/minimal_test/reference/genes.gtf.gz
+mkdir -p testData/minimal_test/reference/salmon_index
+tar -C testData/minimal_test/reference/salmon_index --strip-components 1 -xavf testData/minimal_test/reference/salmon.tar.gz
-# Test paired-end data
cat > testData/minimal_test/input_fastq/sample_sheet.csv << HERE
id,fastq_1,fastq_2,strandedness
WT_REP1,SRR6357070_1.fastq.gz,SRR6357070_2.fastq.gz,reverse
-WT_REP2,SRR6357072_1.fastq.gz,SRR6357072_2.fastq.gz,reverse
+RAP1_UNINDUCED_REP1,SRR6357073_1.fastq.gz,,reverse
HERE
+echo "> Test 1: Salmon qunatification"
nextflow run target/nextflow/workflows/pseudo_alignment_and_quant/main.nf \
--param_list testData/minimal_test/input_fastq/sample_sheet.csv \
- --publish_dir "test_results/psudo_alignment_test" \
+ --publish_dir "test_results/pseudo_alignment_test1" \
--fasta testData/minimal_test/reference/genome.fasta \
--gtf testData/minimal_test/reference/genes.gtf.gz \
--transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
- --salmon_index testData/minimal_test/reference/salmon.tar.gz \
+ --salmon_index testData/minimal_test/reference/salmon_index \
--pseudo_aligner salmon \
-profile docker \
- # -resume
\ No newline at end of file
+ -resume
+
+# echo "> Test 2: Kallisto qunatification"
+# nextflow run target/nextflow/workflows/pseudo_alignment_and_quant/main.nf \
+# --param_list testData/minimal_test/input_fastq/sample_sheet.csv \
+# --publish_dir "test_results/pseudo_alignment_test2" \
+# --fasta testData/minimal_test/reference/genome.fasta \
+# --gtf testData/minimal_test/reference/genes.gtf.gz \
+# --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
+# --kallisto_index test_results/prepare_genome_test3/Kallisto_index \
+# --pseudo_aligner kallisto \
+# -profile docker \
+# -resume
+
+echo "Removing reference data files"
+rm testData/minimal_test/reference/genes.gtf
+rm -r testData/minimal_test/reference/salmon_index
diff --git a/src/workflows/quality_control/config.vsh.yaml b/src/workflows/quality_control/config.vsh.yaml
index 072d347..63ccddb 100644
--- a/src/workflows/quality_control/config.vsh.yaml
+++ b/src/workflows/quality_control/config.vsh.yaml
@@ -41,10 +41,10 @@ argument_groups:
description: By default, the pipeline uses the gene_name field to obtain additional gene identifiers from the input GTF file when running Salmon.
- name: "--quant_out_dir"
type: file
- description: Directory containing quantification results.
+ description: Directory containing Salmon quantification results.
- name: "--quant_results_file"
type: file
- description: Quantification file.
+ description: Salmon quantification file.
- name: "--pseudo_quant_out_dir"
type: file
description: Directory containing quantification results for pseudo alignment.
@@ -59,8 +59,14 @@ argument_groups:
description: Method used for alognment and qqunatification.
- name: "--pseudo_aligner"
type: string
- description: Method used for [seudo alignment and quantification.
-
+ description: Method used for pseudo alignment and quantification.
+ - name: "--rsem_counts_gene"
+ type: file
+ description: Expression counts on gene level
+ - name: "--rsem_counts_transcripts"
+ type: file
+ description: Expression counts on transcript level
+
- name: "--skip_qc"
type: boolean
default: false
@@ -258,11 +264,6 @@ argument_groups:
type: string
- name: "--multiqc_methods_description"
type: file
- - name: "--mqc_yml"
- type: file
- description: Software versions
- - name: "--workflow_summary"
- type: file
- name: "--passed_trimmed_reads"
type: boolean
- name: "--num_trimmed_reads"
@@ -297,8 +298,8 @@ argument_groups:
must_exist: false
# - name: "--hisat2_multiqc"
# type: file
- # - name: "--rsem_multiqc"
- # type: file
+ - name: "--rsem_multiqc"
+ type: file
- name: "--genome_bam_stats"
type: file
must_exist: false
@@ -542,9 +543,6 @@ argument_groups:
type: file
direction: output
default: multiqc_plots
- - name: "--multiqc_versions"
- type: file
- direction: output
# Biotype QC
- name: "--featurecounts"
@@ -600,27 +598,27 @@ argument_groups:
- name: "--pseudo_tpm_gene"
type: file
direction: output
- default: salmon.merged.pseudo_gene_tpm.tsv
+ default: pseudo_gene_tpm.tsv
- name: "--pseudo_counts_gene"
type: file
direction: output
- default: salmon.merged.pseudo_gene_counts.tsv
+ default: pseudo_gene_counts.tsv
- name: "--pseudo_counts_gene_length_scaled"
type: file
direction: output
- default: salmon.merged.pseudo_gene_counts_length_scaled.tsv
+ default: pseudo_gene_counts_length_scaled.tsv
- name: "--pseudo_counts_gene_scaled"
type: file
direction: output
- default: salmon.merged.pseudo_gene_counts_scaled.tsv
+ default: pseudo_gene_counts_scaled.tsv
- name: "--pseudo_tpm_transcript"
type: file
direction: output
- default: salmon.merged.pseudo_transcript_tpm.tsv
+ default: pseudo_transcript_tpm.tsv
- name: "--pseudo_counts_transcript"
type: file
direction: output
- default: salmon.merged.pseudo_transcript_counts.tsv
+ default: pseudo_transcript_counts.tsv
- name: "--pseudo_quant_merged_summarizedexperiment"
type: file
direction: output
@@ -642,6 +640,8 @@ dependencies:
- name: rseqc/rseqc_tin
- name: dupradar
- name: qualimap
+ # - name: qualimap/qualimap_rnaseq
+ # repository: biobox
- name: preseq_lcextrap
- name: featurecounts
repository: biobox
@@ -650,6 +650,7 @@ dependencies:
- name: prepare_multiqc_input
- name: multiqc
repository: biobox
+ - name: rsem/rsem_merge_counts
- name: workflows/merge_quant_results
runners:
diff --git a/src/workflows/quality_control/main.nf b/src/workflows/quality_control/main.nf
index 0032061..c142272 100644
--- a/src/workflows/quality_control/main.nf
+++ b/src/workflows/quality_control/main.nf
@@ -55,7 +55,7 @@ workflow run_wf {
"input": "genome_bam",
"extra_preseq_args": "extra_preseq_args"
],
- toState: [ "preseq_output": "output" ],
+ toState: [ "preseq_output": "output" ]
)
| rseqc_bamstat.run (
@@ -214,10 +214,49 @@ workflow run_wf {
merged_ch = qc_ch
| toSortedList
-
| map { list ->
def ids = list.collect { id, state -> state.id }
def strandedness = list.collect { id, state -> state.strandedness }
+ def num_trimmed_reads = list.collect { id, state -> state.num_trimmed_reads }
+ def passed_trimmed_reads = list.collect { id, state -> state.passed_trimmed_reads }
+ def passed_mapping = list.collect { id, state -> state.passed_mapping }
+ def percent_mapped = list.collect { id, state -> state.percent_mapped }
+ def inferred_strand = list.collect { id, state -> state.inferred_strand }
+ def passed_strand_check = list.collect { id, state -> state.passed_strand_check }
+ def gtf = list.collect { id, state -> state.gtf }.unique()[0]
+ def gtf_extra_attributes = list.collect { id, state -> state.gtf_extra_attributes }.unique()[0]
+ def gtf_group_features = list.collect { id, state -> state.gtf_group_features }.unique()[0]
+ def pca_header_multiqc = list.collect { id, state -> state.pca_header_multiqc }.unique()[0]
+ def clustering_header_multiqc = list.collect { id, state -> state.clustering_header_multiqc }.unique()[0]
+ def aligner = list.collect { id, state -> state.aligner }.unique()[0]
+ def pseudo_aligner = list.collect { id, state -> state.pseudo_aligner }.unique()[0]
+ def deseq2_vst = list.collect { id, state -> state.deseq2_vst }.unique()[0]
+ def extra_deseq2_args = list.collect { id, state -> state.extra_deseq2_args }.unique()[0]
+ def extra_deseq2_args2 = list.collect { id, state -> state.extra_deseq2_args2 }.unique()[0]
+ def skip_deseq2_qc = list.collect { id, state -> state.skip_deseq2_qc }.unique()[0]
+ def skip_qc = list.collect { id, state -> state.skip_qc }.unique()[0]
+ def skip_align = list.collect { id, state -> state.skip_align }.unique()[0]
+ def skip_pseudo_align = list.collect { id, state -> state.skip_pseudo_align }.unique()[0]
+ def quant_results = list.collect { id, state ->
+ (state.quant_results_file instanceof java.nio.file.Path && state.quant_results_file.exists()) ?
+ state.quant_results_file :
+ null }
+ def rsem_counts_gene = list.collect { id, state ->
+ (state.rsem_counts_gene instanceof java.nio.file.Path && state.rsem_counts_gene.exists()) ?
+ state.rsem_counts_gene :
+ null }
+ def rsem_counts_transcripts = list.collect { id, state ->
+ (state.rsem_counts_transcripts instanceof java.nio.file.Path && state.rsem_counts_transcripts.exists()) ?
+ state.rsem_counts_transcripts :
+ null }
+ def pseudo_salmon_quant_results = list.collect { id, state ->
+ (state.pseudo_salmon_quant_results_file instanceof java.nio.file.Path && state.pseudo_salmon_quant_results_file.exists()) ?
+ state.pseudo_salmon_quant_results_file :
+ null }
+ def pseudo_kallisto_quant_results = list.collect { id, state ->
+ (state.pseudo_kallisto_quant_results_file instanceof java.nio.file.Path && state.pseudo_kallisto_quant_results_file.exists()) ?
+ state.pseudo_kallisto_quant_results_file :
+ null }
def fastqc_zip_1 = list.collect { id, state ->
(state.fastqc_zip_1 instanceof java.nio.file.Path && state.fastqc_zip_1.exists()) ?
state.fastqc_zip_1 :
@@ -242,26 +281,14 @@ workflow run_wf {
(state.trim_log_2 instanceof java.nio.file.Path && state.trim_log_2.exists()) ?
state.trim_log_2 :
null }
- def sortmerna_log = list.collect { id, state ->
- (state.sortmerna_log instanceof java.nio.file.Path && state.sortmerna_log.exists()) ?
- state.sortmerna_log :
+ def sortmerna_multiqc = list.collect { id, state ->
+ (state.sortmerna_multiqc instanceof java.nio.file.Path && state.sortmerna_multiqc.exists()) ?
+ state.sortmerna_multiqc :
null }
def star_multiqc = list.collect { id, state ->
(state.star_multiqc instanceof java.nio.file.Path && state.star_multiqc.exists()) ?
state.star_multiqc :
null }
- def quant_results = list.collect { id, state ->
- (state.quant_results_file instanceof java.nio.file.Path && state.quant_results_file.exists()) ?
- state.quant_results_file :
- null }
- def pseudo_salmon_quant_results = list.collect { id, state ->
- (state.pseudo_salmon_quant_results_file instanceof java.nio.file.Path && state.pseudo_salmon_quant_results_file.exists()) ?
- state.pseudo_salmon_quant_results_file :
- null }
- def pseudo_kallisto_quant_results = list.collect { id, state ->
- (state.pseudo_kallisto_quant_results_file instanceof java.nio.file.Path && state.pseudo_kallisto_quant_results_file.exists()) ?
- state.pseudo_kallisto_quant_results_file :
- null }
def genome_bam_stats = list.collect { id, state ->
(state.genome_bam_stats instanceof java.nio.file.Path && state.genome_bam_stats.exists()) ?
state.genome_bam_stats :
@@ -278,6 +305,14 @@ workflow run_wf {
(state.markduplicates_multiqc instanceof java.nio.file.Path && state.markduplicates_multiqc.exists()) ?
state.markduplicates_multiqc :
null }
+ def salmon_multiqc = list.collect { id, state ->
+ (state.salmon_multiqc instanceof java.nio.file.Path && state.salmon_multiqc.exists()) ?
+ state.salmon_multiqc :
+ null }
+ def rsem_multiqc = list.collect { id, state ->
+ (state.rsem_multiqc instanceof java.nio.file.Path && state.rsem_multiqc.exists()) ?
+ state.rsem_multiqc :
+ null }
def pseudo_multiqc = list.collect { id, state ->
(state.pseudo_multiqc instanceof java.nio.file.Path && state.pseudo_multiqc.exists()) ?
state.pseudo_multiqc :
@@ -338,40 +373,45 @@ workflow run_wf {
(state.tin_output_summary instanceof java.nio.file.Path && state.tin_output_summary.exists()) ?
state.tin_output_summary :
null }
- def num_trimmed_reads = list.collect { id, state -> state.num_trimmed_reads }
- def passed_trimmed_reads = list.collect { id, state -> state.passed_trimmed_reads }
- def passed_mapping = list.collect { id, state -> state.passed_mapping }
- def percent_mapped = list.collect { id, state -> state.percent_mapped }
- def inferred_strand = list.collect { id, state -> state.inferred_strand }
- def passed_strand_check = list.collect { id, state -> state.passed_strand_check }
- def gtf = list.collect { id, state -> state.gtf }.unique()[0]
- def gtf_extra_attributes = list.collect { id, state -> state.gtf_extra_attributes }.unique()[0]
- def gtf_group_features = list.collect { id, state -> state.gtf_group_features }.unique()[0]
- def pca_header_multiqc = list.collect { id, state -> state.pca_header_multiqc }.unique()[0]
- def clustering_header_multiqc = list.collect { id, state -> state.clustering_header_multiqc } .unique()[0]
- def aligner = list.collect { id, state -> state.aligner } .unique()[0]
- def pseudo_aligner = list.collect { id, state -> state.pseudo_aligner } .unique()[0]
- def deseq2_vst = list.collect { id, state -> state.deseq2_vst }.unique()[0]
- def extra_deseq2_args = list.collect { id, state -> state.extra_deseq2_args }.unique()[0]
- def extra_deseq2_args2 = list.collect { id, state -> state.extra_deseq2_args2 }.unique()[0]
- def skip_deseq2_qc = list.collect { id, state -> state.skip_deseq2_qc }.unique()[0]
def multiqc_custom_config = list.collect { id, state -> state.multiqc_custom_config }.unique()[0]
- def skip_qc = list.collect { id, state -> state.skip_qc }.unique()[0]
- def skip_align = list.collect { id, state -> state.skip_align }.unique()[0]
- def skip_pseudo_align = list.collect { id, state -> state.skip_pseudo_align }.unique()[0]
["merged", [
ids: ids,
strandedness: strandedness,
+ num_trimmed_reads: num_trimmed_reads,
+ passed_trimmed_reads: passed_trimmed_reads,
+ passed_mapping: passed_mapping,
+ percent_mapped: percent_mapped,
+ inferred_strand: inferred_strand,
+ passed_strand_check: passed_strand_check,
+ skip_align: skip_align,
+ skip_pseudo_align: skip_pseudo_align,
+ quant_results: quant_results,
+ rsem_counts_gene: rsem_counts_gene,
+ rsem_counts_transcripts: rsem_counts_transcripts,
+ pseudo_salmon_quant_results: pseudo_salmon_quant_results,
+ pseudo_kallisto_quant_results: pseudo_kallisto_quant_results,
+ gtf: gtf,
+ gtf_extra_attributes: gtf_extra_attributes,
+ gtf_group_features: gtf_group_features,
+ pca_header_multiqc: pca_header_multiqc,
+ clustering_header_multiqc: clustering_header_multiqc,
+ aligner: aligner,
+ pseudo_aligner: pseudo_aligner,
+ deseq2_vst: deseq2_vst,
+ extra_deseq2_args: extra_deseq2_args,
+ extra_deseq2_args2: extra_deseq2_args2,
+ skip_deseq2_qc: skip_deseq2_qc,
fastqc_zip: fastqc_zip_1 + fastqc_zip_2,
trim_zip: trim_zip_1 + trim_zip_2,
trim_log: trim_log_1 + trim_log_2,
- sortmerna_log: sortmerna_log,
+ sortmerna_multiqc: sortmerna_multiqc,
star_multiqc: star_multiqc,
- salmon_multiqc: quant_results,
genome_bam_stats: genome_bam_stats,
genome_bam_flagstat: genome_bam_flagstat,
genome_bam_idxstats: genome_bam_idxstats,
markduplicates_multiqc: markduplicates_multiqc,
+ salmon_multiqc: salmon_multiqc,
+ rsem_multiqc: rsem_multiqc,
pseudo_multiqc: pseudo_multiqc,
featurecounts_multiqc: featurecounts_multiqc,
featurecounts_rrna_multiqc: featurecounts_rrna_multiqc,
@@ -387,32 +427,114 @@ workflow run_wf {
read_distribution_output: read_distribution_output,
read_duplication_output_duplication_rate_mapping: read_duplication_output_duplication_rate_mapping,
tin_output_summary: tin_output_summary,
- quant_results: quant_results,
- pseudo_salmon_quant_results: pseudo_salmon_quant_results,
- pseudo_kallisto_quant_results: pseudo_kallisto_quant_results,
- gtf: gtf,
- gtf_extra_attributes: gtf_extra_attributes,
- gtf_group_features: gtf_group_features,
- pca_header_multiqc: pca_header_multiqc,
- clustering_header_multiqc: clustering_header_multiqc,
- aligner: aligner,
- pseudo_aligner: pseudo_aligner,
- deseq2_vst: deseq2_vst,
- extra_deseq2_args: extra_deseq2_args,
- extra_deseq2_args2: extra_deseq2_args2,
- skip_deseq2_qc: skip_deseq2_qc,
- num_trimmed_reads: num_trimmed_reads,
- passed_trimmed_reads: passed_trimmed_reads,
- passed_mapping: passed_mapping,
- percent_mapped: percent_mapped,
- inferred_strand: inferred_strand,
- passed_strand_check: passed_strand_check,
- multiqc_custom_config: multiqc_custom_config,
- skip_align: skip_align,
- skip_pseudo_align: skip_pseudo_align
+ multiqc_custom_config: multiqc_custom_config
] ]
}
-
+
+ // | map { list ->
+ // def ids = list.collect { id, state -> state.id }
+ // def strandedness = list.collect { id, state -> state.strandedness }
+ // def num_trimmed_reads = list.collect { id, state -> state.num_trimmed_reads }
+ // def passed_trimmed_reads = list.collect { id, state -> state.passed_trimmed_reads }
+ // def passed_mapping = list.collect { id, state -> state.passed_mapping }
+ // def percent_mapped = list.collect { id, state -> state.percent_mapped }
+ // def inferred_strand = list.collect { id, state -> state.inferred_strand }
+ // def passed_strand_check = list.collect { id, state -> state.passed_strand_check }
+ // def gtf = list.collect { id, state -> state.gtf }.unique()[0]
+ // def gtf_extra_attributes = list.collect { id, state -> state.gtf_extra_attributes }.unique()[0]
+ // def gtf_group_features = list.collect { id, state -> state.gtf_group_features }.unique()[0]
+ // def pca_header_multiqc = list.collect { id, state -> state.pca_header_multiqc }.unique()[0]
+ // def clustering_header_multiqc = list.collect { id, state -> state.clustering_header_multiqc }.unique()[0]
+ // def aligner = list.collect { id, state -> state.aligner }.unique()[0]
+ // def pseudo_aligner = list.collect { id, state -> state.pseudo_aligner }.unique()[0]
+ // def deseq2_vst = list.collect { id, state -> state.deseq2_vst }.unique()[0]
+ // def extra_deseq2_args = list.collect { id, state -> state.extra_deseq2_args }.unique()[0]
+ // def extra_deseq2_args2 = list.collect { id, state -> state.extra_deseq2_args2 }.unique()[0]
+ // def skip_deseq2_qc = list.collect { id, state -> state.skip_deseq2_qc }.unique()[0]
+ // def skip_qc = list.collect { id, state -> state.skip_qc }.unique()[0]
+ // def skip_align = list.collect { id, state -> state.skip_align }.unique()[0]
+ // def skip_pseudo_align = list.collect { id, state -> state.skip_pseudo_align }.unique()[0]
+ // def quant_results = list.collect { id, state ->
+ // (state.quant_results_file instanceof java.nio.file.Path && state.quant_results_file.exists()) ?
+ // state.quant_results_file :
+ // null }
+ // def rsem_counts_gene = list.collect { id, state ->
+ // (state.rsem_counts_gene instanceof java.nio.file.Path && state.rsem_counts_gene.exists()) ?
+ // state.rsem_counts_gene :
+ // null }
+ // def rsem_counts_transcripts = list.collect { id, state ->
+ // (state.rsem_counts_transcripts instanceof java.nio.file.Path && state.rsem_counts_transcripts.exists()) ?
+ // state.rsem_counts_transcripts :
+ // null }
+ // def pseudo_salmon_quant_results = list.collect { id, state ->
+ // (state.pseudo_salmon_quant_results_file instanceof java.nio.file.Path && state.pseudo_salmon_quant_results_file.exists()) ?
+ // state.pseudo_salmon_quant_results_file :
+ // null }
+ // def pseudo_kallisto_quant_results = list.collect { id, state ->
+ // (state.pseudo_kallisto_quant_results_file instanceof java.nio.file.Path && state.pseudo_kallisto_quant_results_file.exists()) ?
+ // state.pseudo_kallisto_quant_results_file :
+ // null }
+ // def fastqc_zip_1_dirs = list.collect{it[1].fastqc_zip_1.getParent()}
+ // def fastqc_zip_2_dirs = list.collect{it[1].fastqc_zip_2.getParent()}
+ // def trim_zip_1_dirs = list.collect{it[1].trim_zip_1.getParent()}
+ // def trim_zip_2_dirs = list.collect{it[1].trim_zip_2.getParent()}
+ // def trim_log_1_dirs = list.collect{it[1].trim_log_1.getParent()}
+ // def trim_log_2_dirs = list.collect{it[1].trim_log_2.getParent()}
+ // def sortmerna_multiqc_dirs = list.collect{it[1].sortmerna_multiqc.getParent()}
+ // def star_multiqc_dirs = list.collect{it[1].star_multiqc.getParent()}
+ // def genome_bam_stats_dirs = list.collect{it[1].genome_bam_stats.getParent()}
+ // def genome_bam_flagstat_dirs = list.collect{it[1].genome_bam_flagstat.getParent()}
+ // def genome_bam_idxstats_dirs = list.collect{it[1].genome_bam_idxstats}
+ // def markduplicates_multiqc_dirs = list.collect{it[1].markduplicates_multiqc.getParent()}
+ // def salmon_multiqc_dirs = list.collect{it[1].salmon_multiqc}
+ // def rsem_multiqc_dirs = list.collect{it[1].rsem_multiqc.getParent()}
+ // def pseudo_multiqc_dirs = list.collect{it[1].pseudo_multiqc.getParent()}
+ // def featurecounts_multiqc_dirs = list.collect{it[1].featurecounts_multiqc.getParent()}
+ // def featurecounts_rrna_multiqc_dirs = list.collect{it[1].featurecounts_rrna_multiqc.getParent()}
+ // def preseq_output_dirs = list.collect{it[1].preseq_output.getParent()}
+ // def qualimap_output_dirs = list.collect{it[1].qualimap_output_dir}
+ // def dupradar_output_dup_intercept_mqc_dirs = list.collect{it[1].dupradar_output_dup_intercept_mqc.getParent()}
+ // def dupradar_output_duprate_exp_denscurve_mqc_dirs = list.collect{it[1].dupradar_output_duprate_exp_denscurve_mqc.getParent()}
+ // def bamstat_output_dirs = list.collect{it[1].bamstat_output.getParent()}
+ // def strandedness_output_dirs = list.collect{it[1].strandedness_output.getParent()}
+ // def inner_dist_output_freq_dirs = list.collect{it[1].inner_dist_output_freq.getParent()}
+ // def junction_annotation_output_log_dirs = list.collect{it[1].junction_annotation_output_log.getParent()}
+ // def junction_saturation_output_plot_r_dirs = list.collect{it[1].junction_saturation_output_plot_r.getParent()}
+ // def read_distribution_output_dirs = list.collect{it[1].read_distribution_output.getParent()}
+ // def read_duplication_output_duplication_rate_mapping_dirs = list.collect{it[1].read_duplication_output_duplication_rate_mapping.getParent()}
+ // def tin_output_summary_dirs = list.collect{it[1].tin_output_summary.getParent()}
+ // def multiqc_custom_config_dirs = list.collect{it[1].multiqc_custom_config.getParent()}
+ // ["merged", [
+ // ids: ids,
+ // strandedness: strandedness,
+ // num_trimmed_reads: num_trimmed_reads,
+ // passed_trimmed_reads: passed_trimmed_reads,
+ // passed_mapping: passed_mapping,
+ // percent_mapped: percent_mapped,
+ // inferred_strand: inferred_strand,
+ // passed_strand_check: passed_strand_check,
+ // skip_align: skip_align,
+ // skip_pseudo_align: skip_pseudo_align,
+ // quant_results: quant_results,
+ // rsem_counts_gene: rsem_counts_gene,
+ // rsem_counts_transcripts: rsem_counts_transcripts,
+ // pseudo_salmon_quant_results: pseudo_salmon_quant_results,
+ // pseudo_kallisto_quant_results: pseudo_kallisto_quant_results,
+ // gtf: gtf,
+ // gtf_extra_attributes: gtf_extra_attributes,
+ // gtf_group_features: gtf_group_features,
+ // pca_header_multiqc: pca_header_multiqc,
+ // clustering_header_multiqc: clustering_header_multiqc,
+ // aligner: aligner,
+ // pseudo_aligner: pseudo_aligner,
+ // deseq2_vst: deseq2_vst,
+ // extra_deseq2_args: extra_deseq2_args,
+ // extra_deseq2_args2: extra_deseq2_args2,
+ // skip_deseq2_qc: skip_deseq2_qc,
+ // multiqc_input: fastqc_zip_1_dirs + fastqc_zip_2_dirs + trim_zip_1_dirs + trim_zip_2_dirs + trim_log_1_dirs + trim_log_2_dirs + sortmerna_multiqc_dirs + star_multiqc_dirs + genome_bam_stats_dirs + genome_bam_flagstat_dirs + genome_bam_idxstats_dirs + markduplicates_multiqc_dirs + salmon_multiqc_dirs + rsem_multiqc_dirs + pseudo_multiqc_dirs + featurecounts_multiqc_dirs + featurecounts_rrna_multiqc_dirs + preseq_output_dirs + qualimap_output_dirs + dupradar_output_dup_intercept_mqc_dirs + dupradar_output_duprate_exp_denscurve_mqc_dirs + bamstat_output_dirs + strandedness_output_dirs + inner_dist_output_freq_dirs + junction_annotation_output_log_dirs + junction_saturation_output_plot_r_dirs + read_distribution_output_dirs + read_duplication_output_duplication_rate_mapping_dirs + tin_output_summary_dirs + multiqc_custom_config_dirs
+ // ] ]
+ // }
+
// Merge quantification results of alignment
| merge_quant_results.run (
runIf: { id, state -> !state.skip_align && state.aligner == 'star_salmon' },
@@ -434,19 +556,36 @@ workflow run_wf {
"lengths_transcript": "lengths_transcript",
"quant_merged_summarizedexperiment": "quant_merged_summarizedexperiment"
],
- key: "merge_qunat_results"
+ key: "merge_quant_results"
+ )
+
+ | rsem_merge_counts.run (
+ runIf: { id, state -> state.aligner == 'star_rsem' },
+ fromState: [
+ "counts_gene": "rsem_counts_gene",
+ "counts_transcripts": "rsem_counts_transcripts"
+ ],
+ toState: [
+ "tpm_gene": "merged_gene_tpm",
+ "counts_gene": "merged_gene_counts",
+ "tpm_transcript": "merged_transcript_tpm",
+ "counts_transcript": "merged_transcript_counts"
+ ]
)
| deseq2_qc.run (
runIf: { id, state -> !state.skip_qc && !state.skip_deseq2_qc && !state.skip_align },
- fromState: [
- "counts": "counts_gene_length_scaled",
- "pca_header_multiqc": "pca_header_multiqc",
- "clustering_header_multiqc": "clustering_header_multiqc",
- "deseq2_vst": "deseq2_vst",
- "extra_deseq2_args": "extra_deseq2_args",
- "extra_deseq2_args2": "extra_deseq2_args2"
- ],
+ fromState: { id, state ->
+ def counts = (state.aligner == "star_rsem") ? state.counts_gene : state.counts_gene_length_scaled
+ [
+ counts: counts,
+ pca_header_multiqc: state.pca_header_multiqc,
+ clustering_header_multiqc: state.clustering_header_multiqc,
+ deseq2_vst: state.deseq2_vst,
+ extra_deseq2_args: state.extra_deseq2_args,
+ extra_deseq2_args2: state.extra_deseq2_args2
+ ]
+ },
toState: [
"deseq2_output": "deseq2_output",
"deseq2_pca_multiqc": "pca_multiqc",
@@ -547,14 +686,15 @@ workflow run_wf {
"fastqc_raw_multiqc": "fastqc_zip",
"fastqc_trim_multiqc": "trim_zip",
"trim_log_multiqc": "trim_log",
- "sortmerna_multiqc": "sortmerna_log",
+ "sortmerna_multiqc": "sortmerna_multiqc",
"star_multiqc": "star_multiqc",
"salmon_multiqc": "salmon_multiqc",
+ "rsem_multiqc": "rsem_multiqc",
+ "pseudo_multiqc": "pseudo_multiqc",
"samtools_stats": "genome_bam_stats",
"samtools_flagstat": "genome_bam_flagstat",
"samtools_idxstats": "genome_bam_idxstats",
- "markduplicates_multiqc": "markduplicates_multiqc",
- "pseudo_multiqc": "pseudo_multiqc",
+ "markduplicates_multiqc": "markduplicates_multiqc",
"featurecounts_multiqc": "featurecounts_multiqc",
"featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc",
"aligner_pca_multiqc": "deseq2_pca_multiqc",
@@ -583,10 +723,11 @@ workflow run_wf {
"title": "multiqc_title",
"input": "multiqc_input",
],
+ args: [exclude_modules: "general_stats"],
toState: [
"multiqc_report": "output_report",
"multiqc_data": "output_data",
- "multiqc_plots": "output_plots",
+ "multiqc_plots": "output_plots"
]
)
@@ -688,8 +829,7 @@ workflow run_wf {
"deseq2_output_pseudo": "deseq2_output_pseudo",
"multiqc_report": "multiqc_report",
"multiqc_data": "multiqc_data",
- "multiqc_plots": "multiqc_plots",
- "multiqc_versions": "multiqc_versions"
+ "multiqc_plots": "multiqc_plots"
]
)
@@ -749,3 +889,5 @@ def getInferexperimentStrandedness(inferexperiment_file, cutoff=30) {
return [ strandedness, sense, antisense, undetermined ]
}
+
+
diff --git a/src/workflows/rnaseq/config.vsh.yaml b/src/workflows/rnaseq/config.vsh.yaml
index f8a6029..0060fda 100644
--- a/src/workflows/rnaseq/config.vsh.yaml
+++ b/src/workflows/rnaseq/config.vsh.yaml
@@ -233,11 +233,7 @@ argument_groups:
- name: "--skip_pseudo_alignment"
type: boolean_true
description: Skip all of the pseudo-alignment-based processes within the pipeline.
- - name: extra_rsem_prepare_reference_args
- type: string
- description: Extra arguments to pass to rsem-prepare-reference command in addition to defaults defined by the pipeline.
- default: '--star'
- - name: extra_rsem_calculate_expression_args
+ - name: --extra_rsem_calculate_expression_args
type: string
description: Extra arguments to pass to rsem-calculate-expression command in addition to defaults defined by the pipeline.
default: '--star --star-output-genome-bam --star-gzipped-read-file --estimate-rspd --seed 1'
@@ -349,42 +345,45 @@ argument_groups:
- name: Output
arguments:
+ # Reference files
- name: "--output_fasta"
type: file
direction: output
- default: genome/reference_genome.fasta
+ default: reference/genome.fasta
- name: "--output_gtf"
type: file
direction: output
- default: genome/gene_annotation.gtf
+ default: reference/gene_annotation.gtf
- name: "--output_transcript_fasta"
type: file
direction: output
- default: genome/transcriptome.fasta
+ default: reference/transcriptome.fasta
- name: "--output_gene_bed"
type: file
direction: output
- default: genome/gene_annotation.bed
+ default: reference/gene_annotation.bed
- name: "--output_star_index"
type: file
direction: output
description: Path to STAR index.
- default: genome/index/STAR
+ default: reference/index/STAR
- name: "--output_salmon_index"
type: file
direction: output
description: Path to Salmon index.
- default: genome/index/Salmon
+ default: reference/index/Salmon
- name: "--output_bbsplit_index"
type: file
direction: output
description: Path to BBSplit index.
- default: genome/index/BBSplit
+ default: reference/index/BBSplit
- name: "--output_kallisto_index"
type: file
direction: output
description: Path to Kallisto index.
- default: genome/index/Kallisto
+ default: reference/index/Kallisto
+
+ # fastq
- name: "--output_fastq_1"
type: file
direction: output
@@ -399,6 +398,8 @@ argument_groups:
must_exist: false
description: Path to output directory
default: fastq/$id.read_2.fastq
+
+ # FastQC
- name: "--fastqc_html_1"
type: file
direction: output
@@ -427,18 +428,6 @@ argument_groups:
required: false
must_exist: false
default: fastqc_raw/$id.read_2.fastqc.zip
- - name: "--trim_log_1"
- type: file
- direction: output
- required: false
- must_exist: false
- default: trimgalore/$id.read_1.trimming_report.txt
- - name: "--trim_log_2"
- type: file
- direction: output
- required: false
- must_exist: false
- default: trimgalore/$id.read_2.trimming_report.txt
- name: "--trim_html_1"
type: file
direction: output
@@ -463,6 +452,34 @@ argument_groups:
required: false
must_exist: false
default: fastqc_trim/$id.read_2.trimmed_fastqc.zip
+
+ # TrimGalore
+ - name: "--trim_log_1"
+ type: file
+ direction: output
+ required: false
+ must_exist: false
+ default: trimgalore/$id.read_1.trimming_report.txt
+ - name: "--trim_log_2"
+ type: file
+ direction: output
+ required: false
+ must_exist: false
+ default: trimgalore/$id.read_2.trimming_report.txt
+
+ # fastp
+ - name: --fastp_trim_json
+ type: file
+ description: The fastp json format report file name
+ default: fastp/$id_out.json
+ direction: output
+ - name: --fastp_trim_html
+ type: file
+ description: The fastp html format report file name
+ default: fastp/$id_out.html
+ direction: output
+
+ # SortMeRNA
- name: "--sortmerna_log"
type: file
direction: output
@@ -470,18 +487,30 @@ argument_groups:
required: false
must_exist: false
description: Sortmerna log file.
+
+ # STAR
- name: "--star_alignment"
type: file
direction: output
- default: STAR_alignment/$id
+ default: STAR/$id
- name: "--genome_bam_sorted"
type: file
direction: output
- default: STAR_alignment/genome_processed/$id.genome.bam
+ default: STAR/genome_processed/$id.genome.bam
- name: "--genome_bam_index"
type: file
direction: output
- default: STAR_alignment/genome_processed/$id.genome.bam.bai
+ default: STAR/genome_processed/$id.genome.bam.bai
+ - name: "--transcriptome_bam"
+ type: file
+ direction: output
+ default: STAR/transcriptome_processed/$id.transcriptome.bam
+ - name: "--transcriptome_bam_index"
+ type: file
+ direction: output
+ default: STAR/transcriptome_processed/$id.transcriptome.bam.bai
+
+ # samtools
- name: "--genome_bam_stats"
type: file
direction: output
@@ -494,14 +523,6 @@ argument_groups:
type: file
direction: output
default: samtools_stats/$id.genome.idxstats
- - name: "--transcriptome_bam"
- type: file
- direction: output
- default: STAR_alignment/transcriptome_processed/$id.transcriptome.bam
- - name: "--transcriptome_bam_index"
- type: file
- direction: output
- default: STAR_alignment/transcriptome_processed/$id.transcriptome.bam.bai
- name: "--transcriptome_bam_stats"
type: file
direction: output
@@ -514,42 +535,85 @@ argument_groups:
type: file
direction: output
default: samtools_stats/$id.transcriptome.idxstats
+
+ # Transcript quantification
- name: "--salmon_quant_results"
type: file
direction: output
- default: salmon/$id
+ default: STAR_Salmon/$id
+ - name: "--salmon_quant_results_file"
+ type: file
+ direction: output
+ default: STAR_Salmon/$id/quant.sf
+ - name: "--pseudo_quant_results"
+ type: file
+ direction: output
+ default: Pseudo_align_quant/$id
+
+ # RSEM
+ - name: "--rsem_counts_gene"
+ type: file
+ description: Expression counts on gene level
+ default: RSEM/$id.genes.results
+ direction: output
+ - name: "--rsem_counts_transcripts"
+ type: file
+ description: Expression counts on transcript level
+ default: RSEM/$id.isoforms.results
+ direction: output
+ - name: "--bam_star_rsem"
+ type: file
+ description: BAM file generated by STAR (from RSEM)
+ default: RSEM/$id.STAR.genome.bam
+ direction: output
+ - name: "--bam_genome_rsem"
+ type: file
+ description: Genome BAM file (from RSEM)
+ default: RSEM/$id.genome.bam
+ direction: output
+ - name: "--bam_transcript_rsem"
+ type: file
+ description: Transcript BAM file (from RSEM)
+ default: RSEM/$id.transcript.bam
+ direction: output
+
+ # Quantification (alignment)
- name: "--tpm_gene"
type: file
direction: output
- default: salmon/gene_tpm.tsv
+ default: transcript_quantification/gene_tpm.tsv
- name: "--counts_gene"
type: file
direction: output
- default: salmon/gene_counts.tsv
+ default: transcript_quantification/gene_counts.tsv
- name: "--counts_gene_length_scaled"
type: file
direction: output
- default: salmon/gene_counts_length_scaled.tsv
+ default: transcript_quantification/gene_counts_length_scaled.tsv
- name: "--counts_gene_scaled"
type: file
direction: output
- default: salmon/gene_counts_scaled.tsv
+ default: transcript_quantification/gene_counts_scaled.tsv
- name: "--tpm_transcript"
type: file
direction: output
- default: salmon/transcript_tpm.tsv
+ default: transcript_quantification/transcript_tpm.tsv
- name: "--counts_transcript"
type: file
direction: output
- default: salmon/transcript_counts.tsv
- - name: "--salmon_merged_summarizedexperiment"
+ default: transcript_quantification/transcript_counts.tsv
+ - name: "--quant_merged_summarizedexperiment"
type: file
direction: output
- default: salmon/summarizedexperiment
+ default: transcript_quantification/summarizedexperiment
+
+ # MarkDuplicates
- name: "--markduplicates_metrics"
type: file
direction: output
- default: picard/$id.sorted.MarkDuplicates.metrics.txt
+ default: picard/$id.MarkDuplicates.metrics.txt
+
+ # StringTie
- name: "--stringtie_transcript_gtf"
type: file
direction: output
@@ -566,6 +630,8 @@ argument_groups:
type: file
direction: output
default: stringtie/$id.ballgown
+
+ # featureCounts
- name: "--featurecounts"
type: file
direction: output
@@ -584,6 +650,8 @@ argument_groups:
direction: output
must_exist: false
default: featurecounts/$id.featureCounts_rrna_mqc.tsv
+
+ # bedGraph
- name: "--bedgraph_forward"
type: file
direction: output
@@ -592,6 +660,8 @@ argument_groups:
type: file
direction: output
default: bedgraph/$id.reverse.bedgraph
+
+ # bigWig
- name: "--bigwig_forward"
type: file
direction: output
@@ -600,10 +670,14 @@ argument_groups:
type: file
direction: output
default: bigwig/$id.reverse.bigwig
+
+ # preseq lc_extrap
- name: "--preseq_output"
type: file
direction: output
default: preseq/$id.lc_extrap.txt
+
+ # RSeQC
- name: "--bamstat_output"
type: file
direction: output
@@ -742,6 +816,8 @@ argument_groups:
required: false
default: RSeQC/tin/xls/$id.tin.xls
description: file with TIN metrics (xls)
+
+ # DupRadar
- name: "--dupradar_output_dupmatrix"
type: file
direction: output
@@ -783,6 +859,8 @@ argument_groups:
direction: output
required: false
default: dupradar/intercept_slope/$id.intercept_slope.txt
+
+ # Qualimap
- name: "--qualimap_output_pdf"
type: file
direction: output
@@ -794,10 +872,14 @@ argument_groups:
direction: output
required: false
default: qualimap/$id
+
+ # DESeq2
- name: "--deseq2_output"
type: file
direction: output
default: deseq2_qc
+
+ # MultiQC
- name: "--multiqc_report"
type: file
direction: output
@@ -813,11 +895,33 @@ argument_groups:
- name: "--multiqc_versions"
type: file
direction: output
- - name: "--versions"
+
+ # Quantification (pseudo alignment)
+ - name: "--pseudo_counts_gene"
type: file
- must_exist: false
direction: output
-
+ default: pseudo_alignment_quantification/gene_counts.tsv
+ - name: "--pseudo_counts_gene_length_scaled"
+ type: file
+ direction: output
+ default: pseudo_alignment_quantification/gene_counts_length_scaled.tsv
+ - name: "--pseudo_counts_gene_scaled"
+ type: file
+ direction: output
+ default: pseudo_alignment_quantification/gene_counts_scaled.tsv
+ - name: "--pseudo_tpm_transcript"
+ type: file
+ direction: output
+ default: pseudo_alignment_quantification/transcript_tpm.tsv
+ - name: "--pseudo_counts_transcript"
+ type: file
+ direction: output
+ default: pseudo_alignment_quantification/transcript_counts.tsv
+ - name: "--pseudo_quant_merged_summarizedexperiment"
+ type: file
+ direction: output
+ default: pseudo_alignment_quantification/quant_merged_summarizedexperiment
+
resources:
- type: nextflow_script
path: main.nf
diff --git a/src/workflows/rnaseq/main.nf b/src/workflows/rnaseq/main.nf
index b13a5d7..001cb86 100644
--- a/src/workflows/rnaseq/main.nf
+++ b/src/workflows/rnaseq/main.nf
@@ -22,27 +22,27 @@ workflow run_wf {
| map { list ->
[ "ref",
- [ fasta: list[1][-1].fasta,
- gtf: list[1][-1].gtf,
- gff: list[1][-1].gff,
- additional_fasta: list[1][-1].additional_fasta,
- transcript_fasta: list[1][-1].transcript_fasta,
- gene_bed: list[1][-1].gene_bed,
- bbsplit_fasta_list: list[1][-1].bbsplit_fasta_list,
- aligner: list[1][-1].aligner,
- pseudo_aligner: list[1][-1].pseudo_aligner,
- star_index: list[1][-1].star_index,
- rsem_index: list[1][-1].rsem_index,
- salmon_index: list[1][-1].salmon_index,
- kallisto_index: list[1][-1].kallisto_index,
- // splicesites: list[1][-1].splicesites,
- // hisat2_index: list[1][-1].hisat2_index,
- bbsplit_index: list[1][-1].bbsplit_index,
- skip_bbsplit: list[1][-1].skip_bbsplit,
- gencode: list[1][-1].gencode,
- biotype: list[1][-1].biotype,
- filter_gtf: list[1][-1].filter_gtf,
- pseudo_aligner_kmer_size: list[1][-1].pseudo_aligner_kmer_size ]
+ [ fasta: list.collect { id, state -> state.fasta }.unique()[0],
+ gtf: list.collect { id, state -> state.gtf }.unique()[0],
+ gff: list.collect { id, state -> state.gff }.unique()[0],
+ additional_fasta: list.collect { id, state -> state.additional_fasta }.unique()[0],
+ transcript_fasta:list.collect { id, state -> state.transcript_fasta }.unique()[0],
+ gene_bed: list.collect { id, state -> state.gene_bed }.unique()[0],
+ bbsplit_fasta_list: list.collect { id, state -> state.bbsplit_fasta_list }.unique()[0],
+ aligner: list.collect { id, state -> state.aligner }.unique()[0],
+ pseudo_aligner: list.collect { id, state -> state.pseudo_aligner }.unique()[0],
+ star_index: list.collect { id, state -> state.star_index }.unique()[0],
+ rsem_index: list.collect { id, state -> state.rsem_index }.unique()[0],
+ salmon_index: list.collect { id, state -> state.salmon_index }.unique()[0],
+ kallisto_index: list.collect { id, state -> state.kallisto_index }.unique()[0],
+ // splicesites: list.collect { id, state -> state.splicesites }.unique()[0],
+ // hisat2_index: list.collect { id, state -> state.hisat2_index }.unique()[0],
+ bbsplit_index: list.collect { id, state -> state.bbsplit_index }.unique()[0],
+ skip_bbsplit: list.collect { id, state -> state.skip_bbsplit }.unique()[0],
+ gencode: list.collect { id, state -> state.gencode }.unique()[0],
+ biotype: list.collect { id, state -> state.biotype }.unique()[0],
+ filter_gtf: list.collect { id, state -> state.filter_gtf }.unique()[0],
+ pseudo_aligner_kmer_size: list.collect { id, state -> state.pseudo_aligner_kmer_size }.unique()[0] ]
]
}
@@ -213,11 +213,14 @@ workflow run_wf {
"gtf_group_features": "gtf_group_features",
"gtf_extra_attributes": "gtf_extra_attributes",
"salmon_quant_libtype": "salmon_quant_libtype",
- "salmon_index": "salmon_index"
+ "salmon_index": "salmon_index",
+ "extra_rsem_calculate_expression_args": "extra_rsem_calculate_expression_args"
],
toState: [
"star_alignment": "star_alignment",
"star_multiqc": "star_multiqc",
+ "rsem_multiqc": "rsem_multiqc",
+ "salmon_multiqc": "salmon_multiqc",
"genome_bam_sorted": "genome_bam_sorted",
"genome_bam_index": "genome_bam_index",
"genome_bam_stats": "genome_bam_stats",
@@ -229,7 +232,11 @@ workflow run_wf {
"transcriptome_bam_flagstat": "transcriptome_bam_flagstat",
"transcriptome_bam_idxstats": "transcriptome_bam_idxstats",
"quant_out_dir": "quant_out_dir",
- "quant_results_file": "quant_results_file"
+ "quant_results_file": "quant_results_file",
+ "rsem_counts_gene": "rsem_counts_gene",
+ "rsem_counts_transcripts": "rsem_counts_transcripts",
+ "bam_genome_rsem": "bam_genome_rsem",
+ "bam_transcript_rsem": "bam_transcript_rsem"
]
)
@@ -239,7 +246,7 @@ workflow run_wf {
def passed_mapping = (percent_mapped >= state.min_mapped_reads) ? true : false
[ id, state + [percent_mapped: percent_mapped, passed_mapping: passed_mapping] ]
}
-
+
// Pseudo-alignment and quantification
| pseudo_alignment_and_quant.run (
runIf: { id, state -> !state.skip_pseudo_alignment && state.passed_trimmed_reads },
@@ -331,16 +338,23 @@ workflow run_wf {
"skip_align": "skip_alignment",
"skip_pseudo_align": "skip_pseudo_alignment",
"gtf": "gtf",
+ "num_trimmed_reads": "num_trimmed_reads",
+ "passed_trimmed_reads": "passed_trimmed_reads",
+ "passed_mapping": "passed_mapping",
+ "percent_mapped": "percent_mapped",
"genome_bam": "genome_bam_sorted",
"genome_bam_index": "genome_bam_index",
- "quant_out_dir": "quant_out_dir",
+ "salmon_multiqc": "salmon_multiqc",
"quant_results_file": "quant_results_file",
+ "rsem_multiqc": "rsem_multiqc",
+ "rsem_counts_gene": "rsem_counts_gene",
+ "rsem_counts_transcripts": "rsem_counts_transcripts",
+ "pseudo_multiqc": "pseudo_multiqc",
"pseudo_quant_out_dir": "pseudo_quant_out_dir",
"pseudo_salmon_quant_results_file": "pseudo_salmon_quant_results_file",
"pseudo_kallisto_quant_results_file": "pseudo_kallisto_quant_results_file",
"aligner": "aligner",
"pseudo_aligner": "pseudo_aligner",
- "pseudo_multiqc": "pseudo_multiqc",
"gene_bed": "gene_bed",
"extra_preseq_args": "extra_preseq_args",
"extra_featurecounts_args": "extra_featurecounts_args",
@@ -368,11 +382,7 @@ workflow run_wf {
"genome_bam_flagstat": "genome_bam_flagstat",
"genome_bam_idxstats": "genome_bam_idxstats",
"markduplicates_multiqc": "markduplicates_metrics",
- "rseqc_modules": "rseqc_modules",
- "num_trimmed_reads": "num_trimmed_reads",
- "passed_trimmed_reads": "passed_trimmed_reads",
- "passed_mapping": "passed_mapping",
- "percent_mapped": "percent_mapped"
+ "rseqc_modules": "rseqc_modules"
],
toState: [
"preseq_output": "preseq_output",
@@ -418,7 +428,7 @@ workflow run_wf {
"counts_gene_scaled": "counts_gene_scaled",
"tpm_transcript": "tpm_transcript",
"counts_transcript": "counts_transcript",
- "salmon_merged_summarizedexperiment": "salmon_merged_summarizedexperiment",
+ "qunat_merged_summarizedexperiment": "quant_merged_summarizedexperiment",
"deseq2_output": "deseq2_output",
"multiqc_report": "multiqc_report",
"multiqc_data": "multiqc_data",
@@ -457,15 +467,17 @@ workflow run_wf {
"star_alignment": "star_alignment",
"genome_bam_sorted": "genome_bam_sorted",
"genome_bam_index": "genome_bam_index",
- "genome_bam_stats": "samtools_stats",
- "genome_bam_flagstat": "samtools_flagstat",
- "genome_bam_idxstats": "samtools_idxstats",
+ "genome_bam_stats": "genome_bam_stats",
+ "genome_bam_flagstat": "genome_bam_flagstat",
+ "genome_bam_idxstats": "genome_bam_idxstats",
"transcriptome_bam": "transcriptome_bam",
"transcriptome_bam_index": "transcriptome_bam_index",
"transcriptome_bam_stats": "transcriptome_bam_stats",
"transcriptome_bam_flagstat": "transcriptome_bam_flagstat",
"transcriptome_bam_idxstats": "transcriptome_bam_idxstats",
- "salmon_quant_results": "salmon_quant_results",
+ "salmon_quant_results": "quant_out_dir",
+ "pseudo_quant_results": "pseudo_quant_out_dir",
+ "markduplicates_metrics": "markduplicates_metrics",
"stringtie_transcript_gtf": "stringtie_transcript_gtf",
"stringtie_coverage_gtf": "stringtie_coverage_gtf",
"stringtie_abundance": "stringtie_abundance",
@@ -531,8 +543,7 @@ workflow run_wf {
"deseq2_output_pseudo": "deseq2_output_pseudo",
"multiqc_report": "multiqc_report",
"multiqc_data": "multiqc_data",
- "multiqc_plots": "multiqc_plots",
- "multiqc_versions": "multiqc_versions"
+ "multiqc_plots": "multiqc_plots"
]
)
@@ -611,7 +622,7 @@ def getFastpReadsAfterFiltering(json_file) {
}
//
-// Function that parses and returns the alignment rate from the STAR log output
+// Function that parses and returns the alignment rate from the STAR log outputs
//
def getStarPercentMapped(align_log) {
def percent_aligned = 0
diff --git a/src/workflows/rnaseq/test_run.sh b/src/workflows/rnaseq/test_run.sh
index 364d2ec..ed9ed02 100755
--- a/src/workflows/rnaseq/test_run.sh
+++ b/src/workflows/rnaseq/test_run.sh
@@ -1,30 +1,34 @@
#!/bin/bash
-viash ns build --setup cb --parallel
+# viash ns build --setup cb --parallel
cat > testData/minimal_test/input_fastq/sample_sheet.csv << HERE
id,fastq_1,fastq_2,strandedness
WT_REP1,SRR6357070_1.fastq.gz;SRR6357071_1.fastq.gz,SRR6357070_2.fastq.gz;SRR6357071_2.fastq.gz,reverse
+WT_REP2,SRR6357072_1.fastq.gz,SRR6357072_2.fastq.gz,reverse
+RAP1_IAA_30M_REP1,SRR6357076_1.fastq.gz,SRR6357076_2.fastq.gz,reverse
RAP1_UNINDUCED_REP1,SRR6357073_1.fastq.gz,,reverse
+RAP1_UNINDUCED_REP2,SRR6357074_1.fastq.gz;SRR6357075_1.fastq.gz,,reverse
HERE
echo ">> Test 1: Trimming reads with Trim galore; alignment with STAR and quantification with Salmon"
nextflow run target/nextflow/workflows/rnaseq/main.nf \
--param_list testData/minimal_test/input_fastq/sample_sheet.csv \
- --publish_dir "test_results/full_pipeline_test1" \
+ --publish_dir test_results/full_pipeline_test1 \
--fasta testData/minimal_test/reference/genome.fasta \
--gtf testData/minimal_test/reference/genes.gtf.gz \
--additional_fasta testData/minimal_test/reference/gfp.fa.gz \
--transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
--bbsplit_fasta_list testData/minimal_test/reference/bbsplit_fasta_list.txt \
--skip_pseudo_alignment \
- -profile docker --resume
+ -profile docker \
+ --resume
# echo ">> Test 2: Trimming reads with Trim galore; alignment with STAR and quantification with Salmon; pseudo-alignment and quantification with Kallisto"
# nextflow run target/nextflow/workflows/rnaseq/main.nf \
# --param_list testData/minimal_test/input_fastq/sample_sheet.csv \
-# --publish_dir "test_results/full_pipeline_test2" \
+# --publish_dir test_results/full_pipeline_test2 \
# --fasta testData/minimal_test/reference/genome.fasta \
# --gtf testData/minimal_test/reference/genes.gtf.gz \
# --additional_fasta testData/minimal_test/reference/gfp.fa.gz \
@@ -35,10 +39,11 @@ nextflow run target/nextflow/workflows/rnaseq/main.nf \
# --kallisto_quant_fragment_length_sd 10 \
# -profile docker --resume
+
# echo ">> Test 3: Trimming reads with fastp; skip alignment; pseudo alignment and quantification with Salmon"
# nextflow run target/nextflow/workflows/rnaseq/main.nf \
# --param_list testData/minimal_test/input_fastq/sample_sheet.csv \
-# --publish_dir "test_results/full_pipeline_test3" \
+# --publish_dir test_results/full_pipeline_test3 \
# --fasta testData/minimal_test/reference/genome.fasta \
# --gtf testData/minimal_test/reference/genes.gtf.gz \
# --additional_fasta testData/minimal_test/reference/gfp.fa.gz \
@@ -46,4 +51,19 @@ nextflow run target/nextflow/workflows/rnaseq/main.nf \
# --bbsplit_fasta_list testData/minimal_test/reference/bbsplit_fasta_list.txt \
# --trimmer fastp \
# --skip_alignment \
-# -profile docker --resume
\ No newline at end of file
+# -profile docker --resume
+
+
+# echo ">> Test 4: Trimming reads with Trim galore; alignment and quantification with RSEM (STAR)"
+# nextflow run target/nextflow/workflows/rnaseq/main.nf \
+# --param_list testData/minimal_test/input_fastq/sample_sheet.csv \
+# --publish_dir test_results/full_pipeline_test4 \
+# --fasta testData/minimal_test/reference/genome.fasta \
+# --gtf testData/minimal_test/reference/genes.gtf.gz \
+# --additional_fasta testData/minimal_test/reference/gfp.fa.gz \
+# --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
+# --bbsplit_fasta_list testData/minimal_test/reference/bbsplit_fasta_list.txt \
+# --aligner star_rsem \
+# --skip_pseudo_alignment \
+# -profile docker \
+# --resume
diff --git a/target/dependencies/vsh/vsh/biobox/v0.2.0/nextflow/umi_tools/umi_tools_extract/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.2.0/nextflow/umi_tools/umi_tools_extract/.config.vsh.yaml
new file mode 100644
index 0000000..ef8d1db
--- /dev/null
+++ b/target/dependencies/vsh/vsh/biobox/v0.2.0/nextflow/umi_tools/umi_tools_extract/.config.vsh.yaml
@@ -0,0 +1,475 @@
+name: "umi_tools_extract"
+namespace: "umi_tools"
+version: "v0.2.0"
+argument_groups:
+- name: "Input"
+ arguments:
+ - type: "file"
+ name: "--input"
+ description: "File containing the input data."
+ info: null
+ example:
+ - "sample.fastq"
+ must_exist: true
+ create_parent: true
+ required: true
+ direction: "input"
+ multiple: false
+ multiple_sep: ";"
+ - type: "file"
+ name: "--read2_in"
+ description: "File containing the input data for the R2 reads (if paired). If\
+ \ provided, a need to be provided."
+ info: null
+ example:
+ - "sample_R2.fastq"
+ must_exist: true
+ create_parent: true
+ required: false
+ direction: "input"
+ multiple: false
+ multiple_sep: ";"
+ - type: "string"
+ name: "--bc_pattern"
+ alternatives:
+ - "-p"
+ description: "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the\
+ \ first 6 nucleotides \nof the read are from the UMI.\n"
+ info: null
+ required: false
+ direction: "input"
+ multiple: false
+ multiple_sep: ";"
+ - type: "string"
+ name: "--bc_pattern2"
+ description: "The UMI barcode pattern to use for read 2."
+ info: null
+ required: false
+ direction: "input"
+ multiple: false
+ multiple_sep: ";"
+- name: "Output"
+ arguments:
+ - type: "file"
+ name: "--output"
+ description: "Output file for read 1."
+ info: null
+ must_exist: true
+ create_parent: true
+ required: true
+ direction: "output"
+ multiple: false
+ multiple_sep: ";"
+ - type: "file"
+ name: "--read2_out"
+ description: "Output file for read 2."
+ info: null
+ must_exist: true
+ create_parent: true
+ required: false
+ direction: "output"
+ multiple: false
+ multiple_sep: ";"
+ - type: "file"
+ name: "--filtered_out"
+ description: "Write out reads not matching regex pattern or cell barcode whitelist\
+ \ to this file.\n"
+ info: null
+ must_exist: true
+ create_parent: true
+ required: false
+ direction: "input"
+ multiple: false
+ multiple_sep: ";"
+ - type: "file"
+ name: "--filtered_out2"
+ description: "Write out read pairs not matching regex pattern or cell barcode\
+ \ whitelist to this file.\n"
+ info: null
+ must_exist: true
+ create_parent: true
+ required: false
+ direction: "input"
+ multiple: false
+ multiple_sep: ";"
+- name: "Extract Options"
+ arguments:
+ - type: "string"
+ name: "--extract_method"
+ description: "UMI pattern to use. Default: `string`.\n"
+ info: null
+ example:
+ - "string"
+ required: false
+ choices:
+ - "string"
+ - "regex"
+ direction: "input"
+ multiple: false
+ multiple_sep: ";"
+ - type: "boolean_true"
+ name: "--error_correct_cell"
+ description: "Error correct cell barcodes to the whitelist."
+ info: null
+ direction: "input"
+ - type: "file"
+ name: "--whitelist"
+ description: "Whitelist of accepted cell barcodes tab-separated format, where\
+ \ column 1 is the whitelisted\ncell barcodes and column 2 is the list (comma-separated)\
+ \ of other cell barcodes which should \nbe corrected to the barcode in column\
+ \ 1. If the --error_correct_cell option is not used, this\ncolumn will be ignored.\n"
+ info: null
+ must_exist: true
+ create_parent: true
+ required: false
+ direction: "input"
+ multiple: false
+ multiple_sep: ";"
+ - type: "file"
+ name: "--blacklist"
+ description: "BlackWhitelist of cell barcodes to discard."
+ info: null
+ must_exist: true
+ create_parent: true
+ required: false
+ direction: "input"
+ multiple: false
+ multiple_sep: ";"
+ - type: "integer"
+ name: "--subset_reads"
+ description: "Only parse the first N reads."
+ info: null
+ required: false
+ direction: "input"
+ multiple: false
+ multiple_sep: ";"
+ - type: "integer"
+ name: "--quality_filter_threshold"
+ description: "Remove reads where any UMI base quality score falls below this threshold."
+ info: null
+ required: false
+ direction: "input"
+ multiple: false
+ multiple_sep: ";"
+ - type: "string"
+ name: "--quality_filter_mask"
+ description: "If a UMI base has a quality below this threshold, replace the base\
+ \ with 'N'.\n"
+ info: null
+ required: false
+ direction: "input"
+ multiple: false
+ multiple_sep: ";"
+ - type: "string"
+ name: "--quality_encoding"
+ description: "Quality score encoding. Choose from:\n * phred33 [33-77]\n * phred64\
+ \ [64-106]\n * solexa [59-106]\n"
+ info: null
+ required: false
+ choices:
+ - "phred33"
+ - "phred64"
+ - "solexa"
+ direction: "input"
+ multiple: false
+ multiple_sep: ";"
+ - type: "boolean_true"
+ name: "--reconcile_pairs"
+ description: "Allow read 2 infile to contain reads not in read 1 infile. This\
+ \ enables support for upstream protocols\nwhere read one contains cell barcodes,\
+ \ and the read pairs have been filtered and corrected without regard\nto the\
+ \ read2.\n"
+ info: null
+ direction: "input"
+ - type: "boolean_true"
+ name: "--three_prime"
+ alternatives:
+ - "--3prime"
+ description: "By default the barcode is assumed to be on the 5' end of the read,\
+ \ but use this option to sepecify that it is\non the 3' end instead. This option\
+ \ only works with --extract_method=string since 3' encoding can be specified\n\
+ explicitly with a regex, e.g `.*(?P.{5})$`.\n"
+ info: null
+ direction: "input"
+ - type: "boolean_true"
+ name: "--ignore_read_pair_suffixes"
+ description: "Ignore \"/1\" and \"/2\" read name suffixes. Note that this options\
+ \ is required if the suffixes are not whitespace\nseparated from the rest of\
+ \ the read name.\narguments:\n"
+ info: null
+ direction: "input"
+ - type: "string"
+ name: "--umi_separator"
+ description: "The character that separates the UMI in the read name. Most likely\
+ \ a colon if you skipped the extraction with\nUMI-tools and used other software.\
+ \ Default: `_`\n"
+ info: null
+ example:
+ - "_"
+ required: false
+ direction: "input"
+ multiple: false
+ multiple_sep: ";"
+ - type: "string"
+ name: "--grouping_method"
+ description: "Method to use to determine read groups by subsuming those with similar\
+ \ UMIs. All methods start by identifying\nthe reads with the same mapping position,\
+ \ but treat similar yet nonidentical UMIs differently. Default: `directional`\n"
+ info: null
+ example:
+ - "directional"
+ required: false
+ choices:
+ - "unique"
+ - "percentile"
+ - "cluster"
+ - "adjacency"
+ - "directional"
+ direction: "input"
+ multiple: false
+ multiple_sep: ";"
+ - type: "integer"
+ name: "--umi_discard_read"
+ description: "After UMI barcode extraction discard either R1 or R2 by setting\
+ \ this parameter to 1 or 2, respectively. Default: `0`\n"
+ info: null
+ example:
+ - 0
+ required: false
+ choices:
+ - 0
+ - 1
+ - 2
+ direction: "input"
+ multiple: false
+ multiple_sep: ";"
+- name: "Common Options"
+ arguments:
+ - type: "file"
+ name: "--log"
+ description: "File with logging information."
+ info: null
+ must_exist: true
+ create_parent: true
+ required: false
+ direction: "output"
+ multiple: false
+ multiple_sep: ";"
+ - type: "boolean_true"
+ name: "--log2stderr"
+ description: "Send logging information to stderr."
+ info: null
+ direction: "output"
+ - type: "integer"
+ name: "--verbose"
+ description: "Log level. The higher, the more output."
+ info: null
+ required: false
+ direction: "input"
+ multiple: false
+ multiple_sep: ";"
+ - type: "file"
+ name: "--error"
+ description: "File with error information."
+ info: null
+ must_exist: true
+ create_parent: true
+ required: false
+ direction: "output"
+ multiple: false
+ multiple_sep: ";"
+ - type: "string"
+ name: "--temp_dir"
+ description: "Directory for temporary files. If not set, the bash environmental\
+ \ variable TMPDIR is used.\n"
+ info: null
+ required: false
+ direction: "input"
+ multiple: false
+ multiple_sep: ";"
+ - type: "integer"
+ name: "--compresslevel"
+ description: "Level of Gzip compression to use. Default=6 matches GNU gzip rather\
+ \ than python gzip default (which is 9).\nDefault `6`.\n"
+ info: null
+ example:
+ - 6
+ required: false
+ direction: "input"
+ multiple: false
+ multiple_sep: ";"
+ - type: "file"
+ name: "--timeit"
+ description: "Store timing information in file."
+ info: null
+ must_exist: true
+ create_parent: true
+ required: false
+ direction: "output"
+ multiple: false
+ multiple_sep: ";"
+ - type: "string"
+ name: "--timeit_name"
+ description: "Name in timing file for this class of jobs."
+ info: null
+ default:
+ - "all"
+ required: false
+ direction: "input"
+ multiple: false
+ multiple_sep: ";"
+ - type: "boolean_true"
+ name: "--timeit_header"
+ description: "Add header for timing information."
+ info: null
+ direction: "input"
+ - type: "integer"
+ name: "--random_seed"
+ description: "Random seed to initialize number generator with."
+ info: null
+ required: false
+ direction: "input"
+ multiple: false
+ multiple_sep: ";"
+resources:
+- type: "bash_script"
+ path: "script.sh"
+ is_executable: true
+description: "Flexible removal of UMI sequences from fastq reads.\nUMIs are removed\
+ \ and appended to the read name. Any other barcode, for example a library barcode,\n\
+ is left on the read. Can also filter reads by quality or against a whitelist.\n"
+test_resources:
+- type: "bash_script"
+ path: "test.sh"
+ is_executable: true
+- type: "file"
+ path: "test_data"
+info: null
+status: "enabled"
+requirements:
+ commands:
+ - "ps"
+keywords:
+- "extract"
+- "umi-tools"
+- "umi"
+- "fastq"
+license: "MIT"
+references:
+ doi:
+ - "10.1101/gr.209601.116"
+links:
+ repository: "https://github.com/CGATOxford/UMI-tools"
+ homepage: "https://umi-tools.readthedocs.io/en/latest/"
+ documentation: "https://umi-tools.readthedocs.io/en/latest/reference/extract.html"
+runners:
+- type: "executable"
+ id: "executable"
+ docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+ id: "nextflow"
+ directives:
+ tag: "$id"
+ auto:
+ simplifyInput: true
+ simplifyOutput: false
+ transcript: false
+ publish: false
+ config:
+ labels:
+ mem1gb: "memory = 1000000000.B"
+ mem2gb: "memory = 2000000000.B"
+ mem5gb: "memory = 5000000000.B"
+ mem10gb: "memory = 10000000000.B"
+ mem20gb: "memory = 20000000000.B"
+ mem50gb: "memory = 50000000000.B"
+ mem100gb: "memory = 100000000000.B"
+ mem200gb: "memory = 200000000000.B"
+ mem500gb: "memory = 500000000000.B"
+ mem1tb: "memory = 1000000000000.B"
+ mem2tb: "memory = 2000000000000.B"
+ mem5tb: "memory = 5000000000000.B"
+ mem10tb: "memory = 10000000000000.B"
+ mem20tb: "memory = 20000000000000.B"
+ mem50tb: "memory = 50000000000000.B"
+ mem100tb: "memory = 100000000000000.B"
+ mem200tb: "memory = 200000000000000.B"
+ mem500tb: "memory = 500000000000000.B"
+ mem1gib: "memory = 1073741824.B"
+ mem2gib: "memory = 2147483648.B"
+ mem4gib: "memory = 4294967296.B"
+ mem8gib: "memory = 8589934592.B"
+ mem16gib: "memory = 17179869184.B"
+ mem32gib: "memory = 34359738368.B"
+ mem64gib: "memory = 68719476736.B"
+ mem128gib: "memory = 137438953472.B"
+ mem256gib: "memory = 274877906944.B"
+ mem512gib: "memory = 549755813888.B"
+ mem1tib: "memory = 1099511627776.B"
+ mem2tib: "memory = 2199023255552.B"
+ mem4tib: "memory = 4398046511104.B"
+ mem8tib: "memory = 8796093022208.B"
+ mem16tib: "memory = 17592186044416.B"
+ mem32tib: "memory = 35184372088832.B"
+ mem64tib: "memory = 70368744177664.B"
+ mem128tib: "memory = 140737488355328.B"
+ mem256tib: "memory = 281474976710656.B"
+ mem512tib: "memory = 562949953421312.B"
+ cpu1: "cpus = 1"
+ cpu2: "cpus = 2"
+ cpu5: "cpus = 5"
+ cpu10: "cpus = 10"
+ cpu20: "cpus = 20"
+ cpu50: "cpus = 50"
+ cpu100: "cpus = 100"
+ cpu200: "cpus = 200"
+ cpu500: "cpus = 500"
+ cpu1000: "cpus = 1000"
+ debug: false
+ container: "docker"
+engines:
+- type: "docker"
+ id: "docker"
+ image: "quay.io/biocontainers/umi_tools:1.1.4--py310h4b81fae_2"
+ target_registry: "images.viash-hub.com"
+ target_tag: "v0.2.0"
+ namespace_separator: "/"
+ setup:
+ - type: "docker"
+ run:
+ - "umi_tools -v | sed 's/ version//g' > /var/software_versions.txt\n"
+ entrypoint: []
+ cmd: null
+- type: "native"
+ id: "native"
+build_info:
+ config: "src/umi_tools/umi_tools_extract/config.vsh.yaml"
+ runner: "nextflow"
+ engine: "docker|native"
+ output: "target/nextflow/umi_tools/umi_tools_extract"
+ executable: "target/nextflow/umi_tools/umi_tools_extract/main.nf"
+ viash_version: "0.9.0"
+ git_commit: "7e530218844c373048bc33de58f021b6460642e5"
+ git_remote: "https://x-access-token:ghs_kiUBq39QrAlnG6IaeAcTcXhllzqpOV4LDB3e@github.com/viash-hub/biobox"
+package_config:
+ name: "biobox"
+ version: "v0.2.0"
+ description: "A collection of bioinformatics tools for working with sequence data.\n"
+ info: null
+ viash_version: "0.9.0"
+ source: "src"
+ target: "target"
+ config_mods:
+ - ".requirements.commands := ['ps']\n"
+ - ".engines += { type: \"native\" }"
+ - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+ - ".engines[.type == 'docker'].target_tag := 'v0.2.0'"
+ keywords:
+ - "bioinformatics"
+ - "modules"
+ - "sequencing"
+ license: "MIT"
+ organization: "vsh"
+ links:
+ repository: "https://github.com/viash-hub/biobox"
+ issue_tracker: "https://github.com/viash-hub/biobox/issues"
diff --git a/target/dependencies/vsh/vsh/biobox/v0.2.0/nextflow/umi_tools/umi_tools_extract/main.nf b/target/dependencies/vsh/vsh/biobox/v0.2.0/nextflow/umi_tools/umi_tools_extract/main.nf
new file mode 100644
index 0000000..6fc55be
--- /dev/null
+++ b/target/dependencies/vsh/vsh/biobox/v0.2.0/nextflow/umi_tools/umi_tools_extract/main.nf
@@ -0,0 +1,3983 @@
+// umi_tools_extract v0.2.0
+//
+// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
+// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
+// Intuitive.
+//
+// The component may contain files which fall under a different license. The
+// authors of this component should specify the license in the header of such
+// files, or include a separate license file detailing the licenses of all included
+// files.
+
+////////////////////////////
+// VDSL3 helper functions //
+////////////////////////////
+
+// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf'
+class UnexpectedArgumentTypeException extends Exception {
+ String errorIdentifier
+ String stage
+ String plainName
+ String expectedClass
+ String foundClass
+
+ // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""}
+ UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) {
+ super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " +
+ "Expected type: ${expectedClass}. Found type: ${foundClass}")
+ this.errorIdentifier = errorIdentifier
+ this.stage = stage
+ this.plainName = plainName
+ this.expectedClass = expectedClass
+ this.foundClass = foundClass
+ }
+}
+
+/**
+ * Checks if the given value is of the expected type. If not, an exception is thrown.
+ *
+ * @param stage The stage of the argument (input or output)
+ * @param par The parameter definition
+ * @param value The value to check
+ * @param errorIdentifier The identifier to use in the error message
+ * @return The value, if it is of the expected type
+ * @throws UnexpectedArgumentTypeException If the value is not of the expected type
+*/
+def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) {
+ // expectedClass will only be != null if value is not of the expected type
+ def expectedClass = null
+ def foundClass = null
+
+ // todo: split if need be
+
+ if (!par.required && value == null) {
+ expectedClass = null
+ } else if (par.multiple) {
+ if (value !instanceof Collection) {
+ value = [value]
+ }
+
+ // split strings
+ value = value.collectMany{ val ->
+ if (val instanceof String) {
+ // collect() to ensure that the result is a List and not simply an array
+ val.split(par.multiple_sep).collect()
+ } else {
+ [val]
+ }
+ }
+
+ // process globs
+ if (par.type == "file" && par.direction == "input") {
+ value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten()
+ }
+
+ // check types of elements in list
+ try {
+ value = value.collect { listVal ->
+ _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier)
+ }
+ } catch (UnexpectedArgumentTypeException e) {
+ expectedClass = "List[${e.expectedClass}]"
+ foundClass = "List[${e.foundClass}]"
+ }
+ } else if (par.type == "string") {
+ // cast to string if need be
+ if (value instanceof GString) {
+ value = value.toString()
+ }
+ expectedClass = value instanceof String ? null : "String"
+ } else if (par.type == "integer") {
+ // cast to integer if need be
+ if (value instanceof String) {
+ try {
+ value = value.toInteger()
+ } catch (NumberFormatException e) {
+ // do nothing
+ }
+ }
+ if (value instanceof java.math.BigInteger) {
+ value = value.intValue()
+ }
+ expectedClass = value instanceof Integer ? null : "Integer"
+ } else if (par.type == "long") {
+ // cast to long if need be
+ if (value instanceof String) {
+ try {
+ value = value.toLong()
+ } catch (NumberFormatException e) {
+ // do nothing
+ }
+ }
+ if (value instanceof Integer) {
+ value = value.toLong()
+ }
+ expectedClass = value instanceof Long ? null : "Long"
+ } else if (par.type == "double") {
+ // cast to double if need be
+ if (value instanceof String) {
+ try {
+ value = value.toDouble()
+ } catch (NumberFormatException e) {
+ // do nothing
+ }
+ }
+ if (value instanceof java.math.BigDecimal) {
+ value = value.doubleValue()
+ }
+ if (value instanceof Float) {
+ value = value.toDouble()
+ }
+ expectedClass = value instanceof Double ? null : "Double"
+ } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") {
+ // cast to boolean if need be
+ if (value instanceof String) {
+ def valueLower = value.toLowerCase()
+ if (valueLower == "true") {
+ value = true
+ } else if (valueLower == "false") {
+ value = false
+ }
+ }
+ expectedClass = value instanceof Boolean ? null : "Boolean"
+ } else if (par.type == "file" && (par.direction == "input" || stage == "output")) {
+ // cast to path if need be
+ if (value instanceof String) {
+ value = file(value, hidden: true)
+ }
+ if (value instanceof File) {
+ value = value.toPath()
+ }
+ expectedClass = value instanceof Path ? null : "Path"
+ } else if (par.type == "file" && stage == "input" && par.direction == "output") {
+ // cast to string if need be
+ if (value instanceof GString) {
+ value = value.toString()
+ }
+ expectedClass = value instanceof String ? null : "String"
+ } else {
+ // didn't find a match for par.type
+ expectedClass = par.type
+ }
+
+ if (expectedClass != null) {
+ if (foundClass == null) {
+ foundClass = value.getClass().getName()
+ }
+ throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass)
+ }
+
+ return value
+}
+// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf'
+Map _processInputValues(Map inputs, Map config, String id, String key) {
+ if (!workflow.stubRun) {
+ config.allArguments.each { arg ->
+ if (arg.required) {
+ assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
+ "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
+ }
+ }
+
+ inputs = inputs.collectEntries { name, value ->
+ def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") }
+ assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument"
+
+ value = _checkArgumentType("input", par, value, "in module '$key' id '$id'")
+
+ [ name, value ]
+ }
+ }
+ return inputs
+}
+
+// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
+Map _processOutputValues(Map outputs, Map config, String id, String key) {
+ if (!workflow.stubRun) {
+ config.allArguments.each { arg ->
+ if (arg.direction == "output" && arg.required) {
+ assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
+ "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
+ }
+ }
+
+ outputs = outputs.collectEntries { name, value ->
+ def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
+ assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
+
+ value = _checkArgumentType("output", par, value, "in module '$key' id '$id'")
+
+ [ name, value ]
+ }
+ }
+ return outputs
+}
+
+// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
+class IDChecker {
+ final def items = [] as Set
+
+ @groovy.transform.WithWriteLock
+ boolean observe(String item) {
+ if (items.contains(item)) {
+ return false
+ } else {
+ items << item
+ return true
+ }
+ }
+
+ @groovy.transform.WithReadLock
+ boolean contains(String item) {
+ return items.contains(item)
+ }
+
+ @groovy.transform.WithReadLock
+ Set getItems() {
+ return items.clone()
+ }
+}
+// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf'
+
+/**
+ * Check if the ids are unique across parameter sets
+ *
+ * @param parameterSets a list of parameter sets.
+ */
+private void _checkUniqueIds(List>> parameterSets) {
+ def ppIds = parameterSets.collect{it[0]}
+ assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds"
+}
+
+// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf'
+
+// helper functions for reading params from file //
+def _getChild(parent, child) {
+ if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) {
+ child
+ } else {
+ def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString()
+ parentAbsolute.replaceAll('/[^/]*$', "/") + child
+ }
+}
+
+// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf'
+/**
+ * Figure out the param list format based on the file extension
+ *
+ * @param param_list A String containing the path to the parameter list file.
+ *
+ * @return A String containing the format of the parameter list file.
+ */
+def _paramListGuessFormat(param_list) {
+ if (param_list !instanceof String) {
+ "asis"
+ } else if (param_list.endsWith(".csv")) {
+ "csv"
+ } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) {
+ "json"
+ } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) {
+ "yaml"
+ } else {
+ "yaml_blob"
+ }
+}
+
+
+/**
+ * Read the param list
+ *
+ * @param param_list One of the following:
+ * - A String containing the path to the parameter list file (csv, json or yaml),
+ * - A yaml blob of a list of maps (yaml_blob),
+ * - Or a groovy list of maps (asis).
+ * @param config A Map of the Viash configuration.
+ *
+ * @return A List of Maps containing the parameters.
+ */
+def _parseParamList(param_list, Map config) {
+ // first determine format by extension
+ def paramListFormat = _paramListGuessFormat(param_list)
+
+ def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ?
+ file(param_list, hidden: true) :
+ null
+
+ // get the correct parser function for the detected params_list format
+ def paramSets = []
+ if (paramListFormat == "asis") {
+ paramSets = param_list
+ } else if (paramListFormat == "yaml_blob") {
+ paramSets = readYamlBlob(param_list)
+ } else if (paramListFormat == "yaml") {
+ paramSets = readYaml(paramListPath)
+ } else if (paramListFormat == "json") {
+ paramSets = readJson(paramListPath)
+ } else if (paramListFormat == "csv") {
+ paramSets = readCsv(paramListPath)
+ } else {
+ error "Format of provided --param_list not recognised.\n" +
+ "Found: '$paramListFormat'.\n" +
+ "Expected: a csv file, a json file, a yaml file,\n" +
+ "a yaml blob or a groovy list of maps."
+ }
+
+ // data checks
+ assert paramSets instanceof List: "--param_list should contain a list of maps"
+ for (value in paramSets) {
+ assert value instanceof Map: "--param_list should contain a list of maps"
+ }
+
+ // id is argument
+ def idIsArgument = config.allArguments.any{it.plainName == "id"}
+
+ // Reformat from List